Exemple #1
0
    def test_02_job(self):
        op = self.generateOperation("Transfer",
                                    3, ["Target1", "Target2"],
                                    sources=["Source1", "Source2"])

        job1 = FTS3Job()
        job1.ftsGUID = "a-random-guid"
        job1.ftsServer = "fts3"

        job1.username = op.username
        job1.userGroup = op.userGroup

        op.ftsJobs.append(job1)

        res = self.client.persistOperation(op)
        self.assertTrue(res["OK"], res)
        opID = res["Value"]

        res = self.client.getOperation(opID)
        self.assertTrue(res["OK"])

        op2 = res["Value"]
        self.assertTrue(len(op2.ftsJobs) == 1)
        job2 = op2.ftsJobs[0]
        self.assertTrue(job2.operationID == opID)

        for attr in ["ftsGUID", "ftsServer", "username", "userGroup"]:
            self.assertTrue(getattr(job1, attr) == getattr(job2, attr))
Exemple #2
0
    def _createNewJob(self, jobType, ftsFiles, targetSE, sourceSE=None):
        """ Create a new FTS3Job object

        :param jobType: type of job to create (Transfer, Staging, Removal)
        :param ftsFiles: list of FTS3File objects the job has to work on
        :param targetSE: SE on which to operate
        :param sourceSE: source SE, only useful for Transfer jobs

        :return: FTS3Job object
     """

        newJob = FTS3Job()
        newJob.type = jobType
        newJob.sourceSE = sourceSE
        newJob.targetSE = targetSE
        newJob.activity = self.activity
        newJob.priority = self.priority
        newJob.username = self.username
        newJob.userGroup = self.userGroup
        newJob.vo = self.vo
        newJob.filesToSubmit = ftsFiles
        newJob.operationID = getattr(self, 'operationID')
        newJob.rmsReqID = self.rmsReqID

        return newJob
Exemple #3
0
  def test_02_job(self):
    op = self.generateOperation('Transfer', 3, ['Target1', 'Target2'], sources=['Source1', 'Source2'])

    job1 = FTS3Job()
    job1.ftsGUID = 'a-random-guid'
    job1.ftsServer = 'fts3'

    job1.username = op.username
    job1.userGroup = op.userGroup

    op.ftsJobs.append(job1)

    res = self.client.persistOperation(op)
    self.assertTrue(res['OK'], res)
    opID = res['Value']

    res = self.client.getOperation(opID)
    self.assertTrue(res['OK'])

    op2 = res['Value']
    self.assertTrue(len(op2.ftsJobs) == 1)
    job2 = op2.ftsJobs[0]
    self.assertTrue(job2.operationID == opID)

    for attr in ['ftsGUID', 'ftsServer', 'username', 'userGroup']:
      self.assertTrue(getattr(job1, attr) == getattr(job2, attr))
Exemple #4
0
def generateFTS3Job(sourceSE, targetSE, lfns, multiHopSE=None):
    """Utility to create a new FTS3Job object with some FTS3Files

    The FileIDs are filled in order, starting with 1

    :param src sourceSE: source SE Name
    :param src targetSE: target SE Name
    :param list lfns: list of lfns (str)
    :param src multiHopSE: hop SE Name

    """

    newJob = FTS3Job()
    newJob.type = "Transfer"
    newJob.sourceSE = sourceSE
    newJob.targetSE = targetSE
    newJob.multiHopSE = multiHopSE
    filesToSubmit = []

    for i, lfn in enumerate(lfns, start=1):
        ftsFile = FTS3File()
        ftsFile.fileID = i
        ftsFile.checksum = lfn
        ftsFile.lfn = lfn
        filesToSubmit.append(ftsFile)

    newJob.filesToSubmit = filesToSubmit
    newJob.operationID = 123
    newJob.rmsReqID = 456

    return newJob
Exemple #5
0
def test_raceCondition(fts3db):
    """This tests a race condition that was exhibited when
    running multiple agent in parallel. What was happening
    was that we were getting some nonFinishedOperations
    for further processing while some jobs associated to that
    operation were being monitored.

    This test reproduces all the possible combination of job/operation
    being assigned/non assigned

    | ---- | ---------- | ----- | ----------- |:---------------------------------------------------------------------------------------- |
    | OpID | OpAssigned | JobID | JobAssigned | Comment                                                                                  |
    | ---- | ---------- | ----- | ----------- |:---------------------------------------------------------------------------------------- |
    | 1    |            |       |             | No job                                                                                   |
    | ---- | ---------- | ----- | ----------- |:---------------------------------------------------------------------------------------- |
    | 2    | Yes        |       |             | No Job                                                                                   |
    | ---- | ---------- | ----- | ----------- |:---------------------------------------------------------------------------------------- |
    | 3    |            | 1     |             | Nothing is Assigned                                                                      |
    | ---- | ---------- | ----- | ----------- |:---------------------------------------------------------------------------------------- |
    | 4    |            | 2     | yes         | Job is assigned, so can't use the operation                                              |
    | ---- | ---------- | ----- | ----------- |:---------------------------------------------------------------------------------------- |
    | 5    | yes        | 3     |             | Op is assigned, so can't use it                                                          |
    | ---- | ---------- | ----- | ----------- |:---------------------------------------------------------------------------------------- |
    | 6    | yes        | 4     | yes         | That would be a problematic situation !!                                                 |
    | ---- | ---------- | ----- | ----------- |:---------------------------------------------------------------------------------------- |
    | 7    |            | 5     | yes         | Job 5 is assigned, so Op 7 cannot be used, even if Job6 is unassigned (this was the bug) |
    |      |            | 6     |             |                                                                                          |
    | ---- | ---------- | ----- | ----------- |:---------------------------------------------------------------------------------------- |
    | 8    | yes        | 7     | yes         | Op8 is assigned, so can't be used  (and is problematic like op6)                         |
    |      | yes        | 8     |             |                                                                                          |
    | ---- | ---------- | ----- | ----------- |:---------------------------------------------------------------------------------------- |

    Under these circumstances, we want:

    * getNonFinishedOperation to return operations 1 and 3
    * getActiveJobs to return jobs 1 and 6


    """

    # Utility to create a FT3File.
    # All operations must have at least one file associated
    # for the queries to make sense
    def _makeFile():
        f = FTS3File()
        f.targetSE = "targetSE"
        return f

    # op1: Non assigned operation without any job
    op1 = FTS3TransferOperation()
    op1.operationID = 1
    op1.ftsFiles.append(_makeFile())

    # op2: assigned operation without any job
    op2 = FTS3TransferOperation()
    op2.operationID = 2
    op2.ftsFiles.append(_makeFile())

    # op3: Non assigned operation with one non assigned job
    op3 = FTS3TransferOperation()
    op3.operationID = 3
    op3.ftsFiles.append(_makeFile())
    j1 = FTS3Job()
    j1.jobID = 1
    op3.ftsJobs.append(j1)

    # op4: Non assigned operation with one assigned job
    op4 = FTS3TransferOperation()
    op4.operationID = 4
    op4.ftsFiles.append(_makeFile())
    j2 = FTS3Job()
    j2.jobID = 2
    op4.ftsJobs.append(j2)

    # op5: assigned operation with one non assigned job
    op5 = FTS3TransferOperation()
    op5.operationID = 5
    op5.ftsFiles.append(_makeFile())
    j3 = FTS3Job()
    j3.jobID = 3
    op5.ftsJobs.append(j3)

    # op6: assigned operation with one assigned job
    # This is a very problematic case that we want
    # to avoid

    op6 = FTS3TransferOperation()
    op6.operationID = 6
    op6.ftsFiles.append(_makeFile())
    j4 = FTS3Job()
    j4.jobID = 4
    op6.ftsJobs.append(j4)

    # op7: Non assigned operation with one assigned job and one non assigned job
    op7 = FTS3TransferOperation()
    op7.operationID = 7
    op7.ftsFiles.append(_makeFile())
    j5 = FTS3Job()
    j5.jobID = 5
    op7.ftsJobs.append(j5)
    j6 = FTS3Job()
    op7.ftsFiles.append(_makeFile())
    j6.jobID = 6
    op7.ftsJobs.append(j6)

    # op8: assigned operation with one assigned job and one non assigned job
    # That is problematic, like op6
    op8 = FTS3TransferOperation()
    op8.operationID = 8
    j7 = FTS3Job()
    op8.ftsFiles.append(_makeFile())
    j7.jobID = 7
    op8.ftsJobs.append(j7)
    j8 = FTS3Job()
    j8.jobID = 8
    op8.ftsJobs.append(j8)

    allOps = [op1, op2, op3, op4, op5, op6, op7, op8]
    for op in allOps:
        res = fts3db.persistOperation(op)
        assert res["OK"]

    with fts3db.engine.begin() as conn:
        conn.execute(
            update(FTS3DB.fts3JobTable).values(assignment="Yes").where(
                FTS3DB.fts3JobTable.c.jobID.in_([2, 4, 5, 7])))

    with fts3db.engine.begin() as conn:
        conn.execute(
            update(FTS3DB.fts3OperationTable).values(assignment="Yes").where(
                FTS3DB.fts3OperationTable.c.operationID.in_([2, 5, 6, 8])))

    res = fts3db.getNonFinishedOperations(operationAssignmentTag=None)
    assert res["OK"]
    nonFinishedOps = res["Value"]
    nonFinishedOpsIDs = [op.operationID for op in nonFinishedOps]
    assert nonFinishedOpsIDs == [1, 3]

    res = fts3db.getActiveJobs(jobAssignmentTag=None)
    assert res["OK"]
    activeJobs = res["Value"]
    activeJobIDs = [op.jobID for op in activeJobs]
    assert activeJobIDs == [1, 6]
Exemple #6
0
    def test_05_cancelNotFoundJob(self):
        """When a job disappears from the server, we need to cancel it
        and its files.

        The scenario is as follow. Operation has 4 files.
        Job1 is submitted for File1 and File2.
        Job2 is submitted for File3 and File4.
        File1 is finished, and then the job disappears.
        We need to cancel Job1 and File2.
        Job2, File3 and File4 are here to make sure we do not cancel wrongly other files
        """

        op = self.generateOperation("Transfer", 4, ["Target1"])

        job1 = FTS3Job()
        job1GUID = "05-cancelall-job1"
        job1.ftsGUID = job1GUID
        job1.ftsServer = "fts3"

        job1.username = op.username
        job1.userGroup = op.userGroup

        # assign the GUID to the files
        op.ftsFiles[0].ftsGUID = job1GUID
        op.ftsFiles[1].ftsGUID = job1GUID

        # Pretend

        op.ftsJobs.append(job1)

        job2 = FTS3Job()
        job2GUID = "05-cancelall-job2"
        job2.ftsGUID = job2GUID
        job2.ftsServer = "fts3"

        job2.username = op.username
        job2.userGroup = op.userGroup

        # assign the GUID to the files
        op.ftsFiles[2].ftsGUID = job2GUID
        op.ftsFiles[3].ftsGUID = job2GUID

        op.ftsJobs.append(job2)

        res = self.db.persistOperation(op)
        opID = res["Value"]

        # Get back the operation to update all the IDs
        res = self.db.getOperation(opID)
        op = res["Value"]

        fileIds = []
        for ftsFile in op.ftsFiles:
            fileIds.append(ftsFile.fileID)

        # Now we monitor Job1, and find that the first file has failed, the second is still ongoing
        # And since File1 is in an FTS final status, we set its ftsGUID to None
        file1ID = op.ftsFiles[0].fileID
        file2ID = op.ftsFiles[1].fileID
        fileStatusDict = {
            file1ID: {
                "status": "Finished",
                "ftsGUID": None
            },
            file2ID: {
                "status": "Staging"
            }
        }

        # And when updating, take care of specifying that you are updating for a given GUID
        res = self.db.updateFileStatus(fileStatusDict, ftsGUID=job1GUID)
        self.assertTrue(res["OK"])

        # Now we monitor again, job one, and find out that job1 has disappeared
        # So we cancel the job and the files
        res = self.db.cancelNonExistingJob(opID, job1GUID)
        self.assertTrue(res["OK"])

        # And hopefully now File2 is Canceled, while the others are as they were
        res = self.client.getOperation(opID)
        op = res["Value"]

        self.assertTrue(op.ftsFiles[0].status == "Finished")
        self.assertTrue(op.ftsFiles[1].status == "Canceled")
        self.assertTrue(op.ftsFiles[1].ftsGUID is None)
        self.assertTrue(op.ftsFiles[2].status == "New")
        self.assertTrue(op.ftsFiles[3].status == "New")
Exemple #7
0
    def test_04_job_monitoring_solve_racecondition(self):
        """We used to have a race condition resulting in duplicated transfers for a file.
        This test reproduces the race condition to make sure it is fixed.
        This test makes sure that the update only happens on files concerned by the job

        The scenario is as follow. Operation has two files File1 and File2.
        Job1 is submitted for File1 and File2.
        File1 fails, File2 is still ongoing.
        We submit Job2 for File1.
        Job1 is monitored again, and we update again File1 to failed (because it is so in Job1)
        A Job3 would be created for File1, dispite Job2 still runing on it.
        """
        op = self.generateOperation("Transfer", 2, ["Target1"])

        job1 = FTS3Job()
        job1GUID = "04-racecondition-job1"
        job1.ftsGUID = job1GUID
        job1.ftsServer = "fts3"

        job1.username = op.username
        job1.userGroup = op.userGroup

        op.ftsJobs.append(job1)

        # Now, when submitting the job, we specify the ftsGUID to which files are
        # assigned
        for ftsFile in op.ftsFiles:
            ftsFile.ftsGUID = job1GUID

        res = self.client.persistOperation(op)
        opID = res["Value"]

        # Get back the operation to update all the IDs
        res = self.client.getOperation(opID)
        op = res["Value"]

        fileIds = []
        for ftsFile in op.ftsFiles:
            fileIds.append(ftsFile.fileID)

        # Arbitrarilly decide that File1 has the smalled fileID
        file1ID = min(fileIds)
        file2ID = max(fileIds)

        # Now we monitor Job1, and find that the first file has failed, the second is still ongoing
        # And since File1 is in an FTS final status, we set its ftsGUID to None
        fileStatusDict = {
            file1ID: {
                "status": "Failed",
                "error": "Someone made a boo-boo",
                "ftsGUID": None
            },
            file2ID: {
                "status": "Staging"
            },
        }

        # And when updating, take care of specifying that you are updating for a given GUID
        res = self.db.updateFileStatus(fileStatusDict, ftsGUID=job1GUID)
        self.assertTrue(res["OK"])

        # We would then submit a second job
        job2 = FTS3Job()
        job2GUID = "04-racecondition-job2"
        job2.ftsGUID = job2GUID
        job2.ftsServer = "fts3"

        job2.username = op.username
        job2.userGroup = op.userGroup

        op.ftsJobs.append(job2)

        # And do not forget to add the new FTSGUID to File1
        # assigned
        for ftsFile in op.ftsFiles:
            if ftsFile.fileID == file1ID:
                ftsFile.ftsGUID = job2GUID

        res = self.client.persistOperation(op)

        # Now we monitor Job2 & Job1 (in this order)
        fileStatusDictJob2 = {
            file1ID: {
                "status": "Staging"
            },
        }

        # Again specify the GUID
        res = self.db.updateFileStatus(fileStatusDictJob2, ftsGUID=job2GUID)
        self.assertTrue(res["OK"])

        # And in Job1, File1 is (and will remain) failed, while File2 is still ongoing
        fileStatusDictJob1 = {
            file1ID: {
                "status": "Failed",
                "error": "Someone made a boo-boo"
            },
            file2ID: {
                "status": "Staging"
            },
        }

        # And thanks to specifying the job GUID, File1 should not be touched !
        res = self.db.updateFileStatus(fileStatusDictJob1, ftsGUID=job1GUID)
        self.assertTrue(res["OK"])

        # And hopefully now there shouldn't be any file to submit
        res = self.client.getOperation(opID)
        op = res["Value"]

        # isTotallyProcessed does not return S_OK struct
        filesToSubmit = op._getFilesToSubmit()
        self.assertEqual(filesToSubmit, [])
Exemple #8
0
    def test_03_job_monitoring_racecondition(self):
        """We used to have a race condition resulting in duplicated transfers for a file.
        This test reproduces the race condition.

        The scenario is as follow. Operation has two files File1 and File2.
        Job1 is submitted for File1 and File2.
        File1 fails, File2 is still ongoing.
        We submit Job2 for File1.
        Job1 is monitored again, and we update again File1 to failed (because it is so in Job1)
        A Job3 would be created for File1, despite Job2 still running on it.
        """
        op = self.generateOperation("Transfer", 2, ["Target1"])

        job1 = FTS3Job()
        job1.ftsGUID = "03-racecondition-job1"
        job1.ftsServer = "fts3"

        job1.username = op.username
        job1.userGroup = op.userGroup

        op.ftsJobs.append(job1)

        res = self.client.persistOperation(op)
        opID = res["Value"]

        # Get back the operation to update all the IDs
        res = self.client.getOperation(opID)
        op = res["Value"]

        fileIds = []
        for ftsFile in op.ftsFiles:
            fileIds.append(ftsFile.fileID)

        file1ID = min(fileIds)
        file2ID = max(fileIds)

        # Now we monitor Job1, and find that the first file has failed, the second is still ongoing
        fileStatusDict = {
            file1ID: {
                "status": "Failed",
                "error": "Someone made a boo-boo"
            },
            file2ID: {
                "status": "Staging"
            },
        }

        res = self.db.updateFileStatus(fileStatusDict)
        self.assertTrue(res["OK"])

        # We would then submit a second job
        job2 = FTS3Job()
        job2.ftsGUID = "03-racecondition-job2"
        job2.ftsServer = "fts3"

        job2.username = op.username
        job2.userGroup = op.userGroup

        op.ftsJobs.append(job2)
        res = self.client.persistOperation(op)

        # Now we monitor Job2 & Job1 (in this order)
        fileStatusDictJob2 = {
            file1ID: {
                "status": "Staging"
            },
        }
        res = self.db.updateFileStatus(fileStatusDictJob2)
        self.assertTrue(res["OK"])

        # And in Job1, File1 is (and will remain) failed, while File2 is still ongoing
        fileStatusDictJob1 = {
            file1ID: {
                "status": "Failed",
                "error": "Someone made a boo-boo"
            },
            file2ID: {
                "status": "Staging"
            },
        }
        res = self.db.updateFileStatus(fileStatusDictJob1)
        self.assertTrue(res["OK"])

        # And now this is the problem, because If we check whether this operation still has
        # files to submit, it will tell me yes, while all the files are being taken care of
        res = self.client.getOperation(opID)
        op = res["Value"]

        # isTotallyProcessed does not return S_OK struct
        filesToSubmit = op._getFilesToSubmit()
        self.assertEqual(filesToSubmit, [op.ftsFiles[0]])