Ejemplo n.º 1
0
    def format(self, result):
        result = DBFormatter.format(self, result)

        if len(result) == 0:
            return False
        else:
            return result[0][0]
Ejemplo n.º 2
0
    def format(self, result):
        result = DBFormatter.format(self, result)

        if len(result) == 0:
            return False
        else:
            return result[0][0]
Ejemplo n.º 3
0
    def testBFormatting(self):
        """
        Test various formats
        """

        myThread = threading.currentThread()
        dbformatter = DBFormatter(myThread.logger, myThread.dbi)
        myThread.transaction.begin()

        result = myThread.transaction.processData(myThread.select)
        output = dbformatter.format(result)
        self.assertEqual(output ,  [['value1a', 'value2a'], \
            ['value1b', 'value2b'], ['value1c', 'value2d']])
        result = myThread.transaction.processData(myThread.select)
        output = dbformatter.formatOne(result)
        print('test1 ' + str(output))
        self.assertEqual(output, ['value1a', 'value2a'])
        result = myThread.transaction.processData(myThread.select)
        output = dbformatter.formatDict(result)
        self.assertEqual( output , [{'bind2': 'value2a', 'bind1': 'value1a'}, \
            {'bind2': 'value2b', 'bind1': 'value1b'},\
            {'bind2': 'value2d', 'bind1': 'value1c'}] )
        result = myThread.transaction.processData(myThread.select)
        output = dbformatter.formatOneDict(result)
        self.assertEqual(output, {'bind2': 'value2a', 'bind1': 'value1a'})
Ejemplo n.º 4
0
    def testBFormatting(self):
        """
        Test various formats
        """

        myThread = threading.currentThread()
        dbformatter = DBFormatter(myThread.logger, myThread.dbi)
        myThread.transaction.begin()

        result = myThread.transaction.processData(myThread.select)
        output = dbformatter.format(result)
        self.assertEqual(output, [["value1a", "value2a"], ["value1b", "value2b"], ["value1c", "value2d"]])
        result = myThread.transaction.processData(myThread.select)
        output = dbformatter.formatOne(result)
        print("test1 " + str(output))
        self.assertEqual(output, ["value1a", "value2a"])
        result = myThread.transaction.processData(myThread.select)
        output = dbformatter.formatDict(result)
        self.assertEqual(
            output,
            [
                {"bind2": "value2a", "bind1": "value1a"},
                {"bind2": "value2b", "bind1": "value1b"},
                {"bind2": "value2d", "bind1": "value1c"},
            ],
        )
        result = myThread.transaction.processData(myThread.select)
        output = dbformatter.formatOneDict(result)
        self.assertEqual(output, {"bind2": "value2a", "bind1": "value1a"})
Ejemplo n.º 5
0
    def testBFormatting(self):
        """
        Test various formats
        """

        myThread = threading.currentThread()
        dbformatter = DBFormatter(myThread.logger, myThread.dbi)
        myThread.transaction.begin()

        result = myThread.transaction.processData(myThread.select)
        output = dbformatter.format(result)
        self.assertEqual(output ,  [['value1a', 'value2a'], \
            ['value1b', 'value2b'], ['value1c', 'value2d']])
        result = myThread.transaction.processData(myThread.select)
        output = dbformatter.formatOne(result)
        print('test1 '+str(output))
        self.assertEqual( output , ['value1a', 'value2a'] )
        result = myThread.transaction.processData(myThread.select)
        output = dbformatter.formatDict(result)
        self.assertEqual( output , [{'bind2': 'value2a', 'bind1': 'value1a'}, \
            {'bind2': 'value2b', 'bind1': 'value1b'},\
            {'bind2': 'value2d', 'bind1': 'value1c'}] )
        result = myThread.transaction.processData(myThread.select)
        output = dbformatter.formatOneDict(result)
        self.assertEqual( output,  {'bind2': 'value2a', 'bind1': 'value1a'} )
Ejemplo n.º 6
0
    def format(self, result):
        results = DBFormatter.format(self, result)

        tasks = []
        for row in results:
            tasks.append(row[0])

        return tasks
Ejemplo n.º 7
0
    def format(self, results):
        """
        _format_

        """
        result = DBFormatter.format(self, results)

        return {"fwjr_path": result[0][0], "taskName": result[0][1]}
    def format(self, results):
        "Build a list of tuples"
        result = []
        results = DBFormatter.format(self, results)
        for item in results:
            result.append(tuple(item))

        return result
Ejemplo n.º 9
0
    def format(self, results):
        "Build a list of tuples"
        result = []
        results = DBFormatter.format(self, results)
        for item in results:
            result.append(tuple(item))

        return result
Ejemplo n.º 10
0
    def format(self, result):
        results = DBFormatter.format(self, result)

        subIDs = []
        for row in results:
            subIDs.append(row[0])

        return subIDs
Ejemplo n.º 11
0
    def format(self, result):
        results = DBFormatter.format(self, result)

        subIDs = []
        for row in results:
            subIDs.append(row[0])

        return subIDs
Ejemplo n.º 12
0
    def format(self, results):
        """
        _format_

        """
        result = DBFormatter.format(self, results)

        return {"fwjr_path": result[0][0], "taskName": result[0][1]}
Ejemplo n.º 13
0
    def format(self, results):
        """
        _format_

        Return the couch document ID or None if one has not been set.
        """
        result = DBFormatter.format(self, results)

        if len(result) == 0:
            return None

        return result[0][0]
Ejemplo n.º 14
0
    def format(self, results):
        """
        _format_

        Return the couch document ID or None if one has not been set.
        """
        result = DBFormatter.format(self, results)

        if len(result) == 0:
            return None

        return result[0][0]
Ejemplo n.º 15
0
    def format(self, results):
        """
        _format_

        Format the query results into a list of LFNs.
        """
        results = DBFormatter.format(self, results)

        status = []
        for result in results:
            status.append(result[0])
        return status
Ejemplo n.º 16
0
    def format(self, results):
        """
        _format_

        """
        results = DBFormatter.format(self, results)

        jobs = []
        for result in results:
            jobs.append({"id": result[0], "fwjr_path": result[1]})

        return jobs
Ejemplo n.º 17
0
    def format(self, results):
        """
        _format_

        """
        results = DBFormatter.format(self, results)

        jobs = []
        for result in results:
            jobs.append({"id": result[0], "fwjr_path": result[1]})

        return jobs
Ejemplo n.º 18
0
    def format(self, results):
        """
        _format_

        Turn the query results into a list of LFNs.
        """
        results = DBFormatter.format(self, results)

        status = []
        for result in results:
            status.append(result[0])
        return status
Ejemplo n.º 19
0
    def format(self, results):
        """
        _format_

        Format the jobgroup ids into a single list.
        """
        results = DBFormatter.format(self, results)

        jobGroupList = []
        for result in results:
            jobGroupList.append(result[0])

        return jobGroupList
Ejemplo n.º 20
0
    def format(self, result):
        """
        _format_

        Format the query result into a single list of subscription types.
        """
        results = DBFormatter.format(self, result)

        resultList = []
        for result in results:
            resultList.append(result[0])

        return resultList
Ejemplo n.º 21
0
    def format(self, results):
        """
        _format_

        Format the result of the query so that it is just a single list of LFNs.
        """
        results = DBFormatter.format(self, results)

        lfns = []
        for result in results:
            lfns.append(result[0])

        return lfns
Ejemplo n.º 22
0
    def format(self, result):
        """
        _format_

        Format the query result into a single list of subscription types.
        """
        results = DBFormatter.format(self, result)

        resultList = []
        for result in results:
            resultList.append(result[0])
            
        return resultList
Ejemplo n.º 23
0
    def format(self, results):
        """
        _format_

        Format the jobgroup ids into a single list.
        """
        results = DBFormatter.format(self, results)

        jobGroupList = []
        for result in results:
            jobGroupList.append(result[0])

        return jobGroupList
Ejemplo n.º 24
0
    def format(self, result):
        """
        _format_

        Format the results into a single list.
        """
        results = DBFormatter.format(self, result)

        resultList = []
        for result in results:
            resultList.append(result[0])

        return resultList
Ejemplo n.º 25
0
    def format(self, results):
        """
        _format_

        Format the result of the query so that it is just a single list of LFNs.
        """
        results = DBFormatter.format(self, results)

        lfns = []
        for result in results:
            lfns.append(result[0])

        return lfns
Ejemplo n.º 26
0
    def format(self, results):
        """
        _format_

        Format the results into a single list of job group IDs.
        """
        results = DBFormatter.format(self, results)

        jobGroupIDs = []
        for result in results:
            for row in result:
                jobGroupIDs.append(int(row))

        return jobGroupIDs
Ejemplo n.º 27
0
    def format(self, results):
        """
        _format_

        Take the array of rows that were returned by the query and format that
        into a single list of open fileset names.
        """
        results = DBFormatter.format(self, results)
        openFilesetNames = []

        for result in results:
            openFilesetNames.append(str(result[0]))

        return openFilesetNames
Ejemplo n.º 28
0
    def format(self, results):
        """
        _format_

        Take the array of rows that were returned by the query and format that
        into a single list of open fileset names.
        """
        results = DBFormatter.format(self, results)
        openFilesetNames = []

        for result in results:
            openFilesetNames.append(str(result[0]))

        return openFilesetNames
Ejemplo n.º 29
0
    def format(self, results):
        """
        _format_

        Format the results into a single list of job group IDs.
        """
        results = DBFormatter.format(self, results)

        jobGroupIDs = []
        for result in results:
            for row in result:
                jobGroupIDs.append(int(row))

        return jobGroupIDs
Ejemplo n.º 30
0
    def format(self, result):
        """
        Have to filter task names that contain only two slashes '/',
        such that we can declare those tasks as top level task.
        :param result: 
        :return: a list of subscriptions id
        """
        results = DBFormatter.format(self, result)

        subIDs = []
        for row in results:
            if len(row[1].split('/')) <= 3:  # remember, first item is empty
                subIDs.append(row[0])

        return subIDs
Ejemplo n.º 31
0
    def execute(self, jobID, conn = None, transaction = False):
        result = self.dbi.processData(self.inputSQL, {"job": jobID}, conn = conn,
                                      transaction = transaction)

        formattedResult = DBFormatter.format(self, result)

        if len(formattedResult) == 0:
            return []

        if int(formattedResult[0][1]) == 0:
            # The input to the job consisted of unmerged files, so we'll need
            # to query for the parents of the job's input.
            result = self.dbi.processData(self.parentSQL, {"job": jobID}, conn = conn,
                                          transaction = transaction)

        return self.format(result)
Ejemplo n.º 32
0
    def execute(self, jobID, conn=None, transaction=False):
        result = self.dbi.processData(self.inputSQL, {"job": jobID},
                                      conn=conn,
                                      transaction=transaction)

        formattedResult = DBFormatter.format(self, result)

        if len(formattedResult) == 0:
            return []

        if int(formattedResult[0][1]) == 0:
            # The input to the job consisted of unmerged files, so we'll need
            # to query for the parents of the job's input.
            result = self.dbi.processData(self.parentSQL, {"job": jobID},
                                          conn=conn,
                                          transaction=transaction)

        return self.format(result)
Ejemplo n.º 33
0
    def select(self, query):
        """
        execute a query.
        """

        # db connect
        self.session.connect()

        # -> WMCore.Database.ResultSet import ResultSet
        results = self.session.processData(query)
    
        if (results.rowcount > 0):
            formatter = DBFormatter(self.logger, self.session)
            out = formatter.format(results)
        else :
            out = None

        return out
Ejemplo n.º 34
0
    def select(self, query):
        """
        execute a query.
        """

        # db connect
        self.session.connect()

        # -> WMCore.Database.ResultSet import ResultSet
        results = self.session.processData(query)

        if (results.rowcount > 0):
            formatter = DBFormatter(self.logger, self.session)
            out = formatter.format(results)
        else:
            out = None

        return out
Ejemplo n.º 35
0
    def testBFormatting(self):
        """
        Test various formats
        """
        # fill the database with some initial data
        self.stuffDB()

        myThread = threading.currentThread()
        dbformatter = DBFormatter(myThread.logger, myThread.dbi)

        result = myThread.dbi.processData(self.selectSQL)
        output = dbformatter.format(result)
        self.assertEqual(output,
                         [['value1a', 1, 'value2a'], ['value1b', 2, 'value2b'],
                          ['value1c', 3, 'value2d']])

        result = myThread.dbi.processData(self.selectSQL)
        output = dbformatter.formatOne(result)
        print('test1 ' + str(output))
        self.assertEqual(output, ['value1a', 1, 'value2a'])

        result = myThread.dbi.processData(self.selectSQL)
        output = dbformatter.formatDict(result)
        self.assertEqual(output, [{
            'column3': 'value2a',
            'column2': 1,
            'column1': 'value1a'
        }, {
            'column3': 'value2b',
            'column2': 2,
            'column1': 'value1b'
        }, {
            'column3': 'value2d',
            'column2': 3,
            'column1': 'value1c'
        }])

        result = myThread.dbi.processData(self.selectSQL)
        output = dbformatter.formatOneDict(result)
        self.assertEqual(output, {
            'column3': 'value2a',
            'column2': 1,
            'column1': 'value1a'
        })
Ejemplo n.º 36
0
def main():
    """
    _main_
    """
    usage = "Usage: %prog -j jobId"
    parser = OptionParser(usage = usage)
    parser.add_option('-j', '--jobId', help = 'Wmbs jobId reported in the component log', dest = 'jobId')
    (options, args) = parser.parse_args()
    if not options.jobId:
        parse.error('You must provide at least one jobId')
        print 'Example: python fixJobAccountant.py -j "1678 1679"'
        sys.exit(1) 
    if 'WMAGENT_CONFIG' not in os.environ:
        os.environ['WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py'
    if 'manage' not in os.environ:
        os.environ['manage'] = '/data/srv/wmagent/current/config/wmagent/manage'

    connectToDB()
    myThread = threading.currentThread()
    formatter = DBFormatter(logging, myThread.dbi)

    for job in options.jobId.split(): 
        myQuery = getQuery + str(job)
        output = myThread.transaction.processData(myQuery)
        result = formatter.format(output)
        reportPath = result[0][0]
        taskName = result[0][1]
        #print 'Report path: %s' % reportPath
        #print 'Task name: %s' % taskName

        jr = Report(reportPath)
        if jr.getTaskName():
            print "Job id %s already has a TaskName %s.\nSkipping .." % (job, jr.getTaskName())
            continue
        jr.setTaskName(taskName)
        jr.save(reportPath)
        print "Updated TaskName for fwjr for jobId: %s" % job 

    print "Done!"
    return 0
Ejemplo n.º 37
0
def getWMBSInfo(config):
    """
    blah
    :return:
    """
    connectToDB()
    myThread = threading.currentThread()
    formatter = DBFormatter(logging, myThread.dbi)

    workflows = formatter.formatDict(myThread.dbi.processData(knownWorkflows))
    workflows = [wf['name'] for wf in workflows]
    print("\n*** WORKFLOWS: found %d distinct workflows in this agent." % len(workflows))
    workflowsDict = fetchWorkflowsSpec(config, workflows)
    printWfStatus(workflows, workflowsDict)

    for st in ('Available', 'Negotiating', 'Acquired', 'Running'):
        print("\n*** WORKQUEUE: elements still marked as %s in LQ workqueue / workqueue_inbox." % st)
        checkLocalWQStatus(config, st)

    for st in ("Acquired", "Running"):
        print("\n*** WORKQUEUE: elements still marked as %s in GQ workqueue." % st)
        checkGlobalWQStatus(config, st)

    workflows = formatter.formatDict(myThread.dbi.processData(incompleteWfs))
    workflows = [wf['name'] for wf in workflows]
    print("\n*** WORKFLOWS: there are %d distinct workflows not completed." % len(workflows))
    printWfStatus(workflows, workflowsDict)

    wfsNotInjected = flattenList(formatter.format(myThread.dbi.processData(workflowsNotInjected)))
    print("\n*** WORKFLOWS: found %d workflows not fully injected." % len(wfsNotInjected))
    printWfStatus(wfsNotInjected, workflowsDict)

    jobsByState = formatter.formatDict(myThread.dbi.processData(jobCountByState))
    print("\n*** WMBS: amount of wmbs jobs in each status:\n%s" % jobsByState)
    # IF we have executing jobs in wmbs and nothing in condor, then investigate the wfs
    if 'executing' in [item['name'] for item in jobsByState]:
        wfsJobCount = formatter.formatDict(myThread.dbi.processData(workflowsExecuting))
        print("\n*** WMBS: %d workflows with executing jobs in wmbs:" % len(wfsJobCount))
        workflows = [wf['name'] for wf in wfsJobCount]
        printWfStatus(workflows, workflowsDict)

    unfinishedSubs = formatter.formatDict(myThread.dbi.processData(unfinishedSubscriptions))
    unfinishedSubs = [wf['wfname'] for wf in unfinishedSubs]
    print("\n*** SUBSCRIPTIONS: subscriptions not finished: %d" % len(unfinishedSubs))
    printWfStatus(unfinishedSubs, workflowsDict)

    filesAvailable = formatter.formatDict(myThread.dbi.processData(filesAvailWMBS))
    print("\n*** SUBSCRIPTIONS: found %d files available in WMBS (waiting for job creation):\n%s" % (len(filesAvailable),
                                                                                                     filesAvailable))

    filesAcquired = formatter.formatDict(myThread.dbi.processData(filesAcqWMBS))
    print("\n*** SUBSCRIPTIONS: found %d files acquired in WMBS (waiting for jobs to finish):\n%s" % (len(filesAcquired),
                                                                                                      filesAcquired))

    blocksopenDBS = formatter.formatDict(myThread.dbi.processData(blocksOpenDBS))
    print("\n*** DBS: found %d blocks open in DBS." % len(blocksopenDBS), end="")
    print(" Printing the first 20 blocks only:\n%s" % blocksopenDBS[:20])

    filesnotinDBS = flattenList(formatter.format(myThread.dbi.processData(filesNotInDBS)))
    print("\n*** DBS: found %d files not uploaded to DBS.\n" % len(filesnotinDBS))
    getDsetAndWf(filesnotinDBS, workflowsDict)

    filesnotinPhedex = flattenList(formatter.format(myThread.dbi.processData(filesNotInPhedex)))
    print("\n*** PHEDEX: found %d files not injected in PhEDEx, with valid block id (recoverable)." % len(filesnotinPhedex))
    getDsetAndWf(filesnotinPhedex, workflowsDict)

    filesnotinPhedexNull = flattenList(formatter.format(myThread.dbi.processData(filesNotInPhedexNull)))
    print("\n*** PHEDEX: found %d files not injected in PhEDEx, with valid block id (unrecoverable)." % len(filesnotinPhedexNull))
    getDsetAndWf(filesnotinPhedexNull, workflowsDict)
 def format(self, result):
     result = DBFormatter.format(self, result)
     if result:
         return result[0][0]
     else:
         return False
Ejemplo n.º 39
0
def getWMBSInfo(config):
    connectToDB()
    myThread = threading.currentThread()
    formatter = DBFormatter(logging, myThread.dbi)

    workflows = formatter.formatDict(myThread.dbi.processData(knownWorkflows))
    workflows = [wf['name'] for wf in workflows]
    print("\n*** WORKFLOWS: found %d distinct workflows in this agent." %
          len(workflows))
    workflowsDict = fetchWorkflowsSpec(config, workflows)
    printWfStatus(workflows, workflowsDict)

    for st in ('Available', 'Negotiating', 'Acquired', 'Running'):
        print(
            "\n*** WORKQUEUE: elements still marked as %s in LQ workqueue / workqueue_inbox."
            % st)
        checkLocalWQStatus(config, st)

    for st in ("Acquired", "Running"):
        print("\n*** WORKQUEUE: elements still marked as %s in GQ workqueue." %
              st)
        checkGlobalWQStatus(config, st)

    workflows = formatter.formatDict(myThread.dbi.processData(incompleteWfs))
    workflows = [wf['name'] for wf in workflows]
    print("\n*** WORKFLOWS: there are %d distinct workflows not completed." %
          len(workflows))
    printWfStatus(workflows, workflowsDict)

    wfsNotInjected = flattenList(
        formatter.format(myThread.dbi.processData(workflowsNotInjected)))
    print("\n*** WORKFLOWS: found %d workflows not fully injected." %
          len(wfsNotInjected))
    printWfStatus(wfsNotInjected, workflowsDict)

    jobsByState = formatter.formatDict(
        myThread.dbi.processData(jobCountByState))
    print("\n*** WMBS: amount of wmbs jobs in each status:\n%s" % jobsByState)
    # IF we have executing jobs in wmbs and nothing in condor, then investigate the wfs
    if 'executing' in [item['name'] for item in jobsByState]:
        wfsJobCount = formatter.formatDict(
            myThread.dbi.processData(workflowsExecuting))
        print("\n*** WMBS: %d workflows with executing jobs in wmbs:" %
              len(wfsJobCount))
        workflows = [wf['name'] for wf in wfsJobCount]
        printWfStatus(workflows, workflowsDict)

    unfinishedSubs = formatter.formatDict(
        myThread.dbi.processData(unfinishedSubscriptions))
    unfinishedSubs = [wf['wfname'] for wf in unfinishedSubs]
    print("\n*** SUBSCRIPTIONS: subscriptions not finished: %d" %
          len(unfinishedSubs))
    printWfStatus(unfinishedSubs, workflowsDict)

    filesAvailable = formatter.formatDict(
        myThread.dbi.processData(filesAvailWMBS))
    print(
        "\n*** SUBSCRIPTIONS: found %d files available in WMBS (waiting for job creation):\n%s"
        % (len(filesAvailable), filesAvailable))

    filesAcquired = formatter.formatDict(
        myThread.dbi.processData(filesAcqWMBS))
    print(
        "\n*** SUBSCRIPTIONS: found %d files acquired in WMBS (waiting for jobs to finish):\n%s"
        % (len(filesAcquired), filesAcquired))

    blocksopenDBS = formatter.formatDict(
        myThread.dbi.processData(blocksOpenDBS))
    print("\n*** DBS: found %d blocks open in DBS." % len(blocksopenDBS),
          end="")
    print(" Printing the first 20 blocks only:\n%s" % blocksopenDBS[:20])

    filesnotinDBS = flattenList(
        formatter.format(myThread.dbi.processData(filesNotInDBS)))
    print("\n*** DBS: found %d files not uploaded to DBS.\n" %
          len(filesnotinDBS))
    getDsetAndWf(filesnotinDBS, workflowsDict)

    filesnotinPhedex = flattenList(
        formatter.format(myThread.dbi.processData(filesNotInPhedex)))
    print(
        "\n*** PHEDEX: found %d files not injected in PhEDEx, with valid block id (recoverable)."
        % len(filesnotinPhedex))
    getDsetAndWf(filesnotinPhedex, workflowsDict)

    filesnotinPhedexNull = flattenList(
        formatter.format(myThread.dbi.processData(filesNotInPhedexNull)))
    print(
        "\n*** PHEDEX: found %d files not injected in PhEDEx, with valid block id (unrecoverable)."
        % len(filesnotinPhedexNull))
    getDsetAndWf(filesnotinPhedexNull, workflowsDict)
Ejemplo n.º 40
0
 def format(self, result):
     result = DBFormatter.format(self, result)
     return result[0][0]
Ejemplo n.º 41
0
 def format(self, result):
     result = DBFormatter.format(self, result)
     if result:
         return result[0][0]
     else:
         return False
def main():
    """
    _main_

    """
    if 'WMAGENT_CONFIG' not in os.environ:
        os.environ['WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py'
    if 'manage' not in os.environ:
        os.environ['manage'] = '/data/srv/wmagent/current/config/wmagent/manage'

    ### Fetch the report pickle files from the component log
    command = ["tail", "-n1000", "install/wmagent/JobAccountant/ComponentLog"]
    p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    out, err = p.communicate()
    logFiles = [line for line in out.splitlines() if 'install/wmagent/JobCreator/JobCache' in line]
    logFiles = [i.split()[2] for i in logFiles]
    msg = "Found %d pickle files to parse " % len(logFiles)

    ### Now unpickle each of these files and get their output files
    # also check whether any of them are duplicate
    lfn2PklDict = {}
    dupOutputPkl = {}  # string value with the dup LFN and keyed by the pickle file path
    jobReport = Report()
    for pklPath in logFiles:
        if not os.path.exists(pklPath):
            continue

        jobReport.load(pklPath)
        for e in jobReport.getAllFiles():
            lfn2PklDict.setdefault(e['lfn'], [])
            lfn2PklDict[e['lfn']].append(pklPath)

    # now check which files contain more than one pickle path (= created by diff jobs)
    dupFiles = []
    for lfn, pkls in lfn2PklDict.iteritems():
        if len(pkls) > 1:
            dupFiles.append(lfn)
            for pkl in pkls:
                if pkl not in dupOutputPkl:
                    jobReport.load(pkl)
                    dupOutputPkl[pkl] = jobReport.__to_json__(None)
                    dupOutputPkl[pkl]['dup_lfns'] = []
                dupOutputPkl[pkl]['dup_lfns'].append(lfn)

    msg += "with a total of %d output files and %d duplicated" % (len(lfn2PklDict), len(dupFiles))
    msg += " files to process among them."
    msg += "\nDuplicate files are:\n%s" % dupFiles
    print(msg)

    if dupFiles:
        print("See dupPickles.json for further details ...")
        with open('dupPickles.json', 'w') as fo:
            json.dump(dupOutputPkl, fo, indent=2)

    if dupFiles:
        var = raw_input("Can we automatically delete those pickle files? Y/N\n")
        if var == "Y":
            # then delete all job report files but the first one - NOT ideal
            for fname in dupFiles:
                for pklFile in lfn2PklDict[fname][1:]:
                    if os.path.isfile(pklFile):
                        print("Deleting %s ..." % pklFile)
                        os.remove(pklFile)
                    else:
                        print("    File has probably been already deleted %s ..." % pklFile)
            print("  Done!")

    ### Time to load all - this is BAD - LFNs from WMBS database
    print("\nNow loading all LFNs from wmbs_file_details ...")
    connectToDB()
    myThread = threading.currentThread()
    formatter = DBFormatter(logging, myThread.dbi)
    output = myThread.transaction.processData("SELECT lfn FROM wmbs_file_details")
    lfnsDB = formatter.format(output)
    lfnsDB = [item[0] for item in lfnsDB]
    print("Retrieved %d lfns from wmbs_file_details" % len(lfnsDB))

    ### Compare what are the duplicates
    dupFiles = list(set(lfn2PklDict.keys()) & set(lfnsDB))
    print("\nFound %d duplicate files." % len(dupFiles))
    if len(dupFiles) == 0:
        sys.exit(0)

    ### Print some basic data about these reports
    print("Their overview is: ")
    dbDupPkl = []
    for fname in dupFiles:
        for pklPath in lfn2PklDict[fname]:
            jobInfo = {'lfn': fname}
            jobInfo['pklPath'] = pklPath

            jobReport.load(pklPath)
            jobInfo['exitCode'] = jobReport.getExitCode()
            jobInfo['taskSuccess'] = jobReport.taskSuccessful()
            jobInfo['EOSLogURL'] = jobReport.getLogURL()
            jobInfo['HostName'] = jobReport.getWorkerNodeInfo()['HostName']
            jobInfo['Site'] = jobReport.getSiteName()
            jobInfo['task'] = jobReport.getTaskName()

            dbDupPkl.append(jobInfo)

    print(pformat(dbDupPkl))
    print("")

    print("Remove them, restart the component and be happy!\n")
    sys.exit(0)
Ejemplo n.º 43
0
 def format(self, result):
     result = DBFormatter.format(self, result)
     return result[0][0]
Ejemplo n.º 44
0
class MySQLBase(object):
    def __init__(self, logger, dbinterface):
        self.logger = logger
        self.dbi = dbinterface
        self.wmformatter = DBFormatter(self.logger, self.dbi)


    def truefalse(self, value):
        if value in ('False', 'FALSE', 'n', 'NO', 'No'):
            value = 0
        return bool(value)

    def convertdatetime(self, t):
        return int(time.mktime(t.timetuple()))

    def timestamp(self):
        """
        generate a timestamp
        """
        t = datetime.datetime.now()
        return self.convertdatetime(t)

    def format(self, result, dictionary = False):
        """
        Some standard formatting
        """
        if not dictionary:
            return self.wmformatter.format(result)
        else:
            return self.wmformatter.formatDict(result)
        
#        out = []
#        for r in result:
#           if dictionary == False:
#            for i in r.cursor.fetchall():
#                out.append(i)
#           else:
#        
#             for i in r.cursor.fetchall():
#               row = RowProxy(r,i)
#               out.append(dict(row.items()))
#   
#               
#        return out
    
    def formatOne(self, result, dictionary = False):
        """
        single value format

        """
        if not dictionary:
            return self.wmformatter.formatOne(result)
        else:
            return self.wmformatter.formatOneDict(result)
      
#        if len(result) == 0:
#            return [] 
#        value = result[0].fetchone()
#        if value == None:
#            return []
#
#        if dictionary == True:
#           row = RowProxy(result[0],value)
#           value = dict(row.items())
#        return value


    def getBinds(self):
        """
        Return the appropriately formatted binds for the sql
        """
        return {}

    def execute(self, conn = None, transaction = False):
        """
        A simple select with no binds/arguments is the default
        """
        result = self.dbi.processData(self.sql, self.getBinds(),
                         conn = conn, transaction = transaction)
        return self.format(result)

       
    def restoreCaps(self, dicts, *newkeys):
        """
        Utility function to restore capitalization in dictionary keys
         lost to conform with Oracle - use of this should be phased out
        """
        for dict in dicts:
            for key in newkeys:
                dict[key] = dict.pop(key.lower())
        return dicts