Ejemplo n.º 1
0
 def setUp(self):
     super(WorkerTests, self).setUp()
     path = self._getTestJobStorePath()
     self.jobStore = FileJobStore(path)
     self.config = Config()
     self.config.jobStore = 'file:%s' % path
     self.jobStore.initialize(self.config)
     self.jobNumber = 0
Ejemplo n.º 2
0
    def getJobStore(cls, locator):
        """
        Create an instance of the concrete job store implementation that matches the given locator.

        :param str locator: The location of the job store to be represent by the instance

        :return: an instance of a concrete subclass of AbstractJobStore
        :rtype: toil.jobStores.abstractJobStore.AbstractJobStore
        """
        name, rest = cls.parseLocator(locator)
        if name == 'file':
            from toil.jobStores.fileJobStore import FileJobStore
            return FileJobStore(rest)
        elif name == 'aws':
            from toil.jobStores.aws.jobStore import AWSJobStore
            return AWSJobStore(rest)
        elif name == 'azure':
            from toil.jobStores.azureJobStore import AzureJobStore
            return AzureJobStore(rest)
        elif name == 'google':
            from toil.jobStores.googleJobStore import GoogleJobStore
            projectID, namePrefix = rest.split(':', 1)
            return GoogleJobStore(namePrefix, projectID)
        else:
            raise RuntimeError("Unknown job store implementation '%s'" % name)
Ejemplo n.º 3
0
def loadJobStore(jobStoreString, config=None):
    """
    Loads a jobStore.

    :param jobStoreString: see exception message below
    :param config: see AbstractJobStore.__init__
    :return: an instance of a concrete subclass of AbstractJobStore
    :rtype: jobStores.abstractJobStore.AbstractJobStore
    """
    if jobStoreString[0] in '/.':
        jobStoreString = 'file:' + jobStoreString

    try:
        jobStoreName, jobStoreArgs = jobStoreString.split(':', 1)
    except ValueError:
        raise RuntimeError(
            'Job store string must either be a path starting in . or / or a contain at least one '
            'colon separating the name of the job store implementation from an initialization '
            'string specific to that job store. If a path starting in . or / is passed, the file '
            'job store will be used for backwards compatibility.')

    if jobStoreName == 'file':
        from toil.jobStores.fileJobStore import FileJobStore
        return FileJobStore(jobStoreArgs, config=config)
    elif jobStoreName == 'aws':
        from toil.jobStores.aws.jobStore import AWSJobStore
        return AWSJobStore.createJobStore(jobStoreArgs, config=config)
    elif jobStoreName == 'azure':
        from toil.jobStores.azureJobStore import AzureJobStore
        account, namePrefix = jobStoreArgs.split(':', 1)
        return AzureJobStore(account, namePrefix, config=config)
    else:
        raise RuntimeError("Unknown job store implementation '%s'" %
                           jobStoreName)
Ejemplo n.º 4
0
    def loadOrCreateJobStore(jobStoreString, config=None):
        """
        Loads an existing jobStore if it already exists. Otherwise a new instance of a jobStore is
        created and returned.

        :param str jobStoreString: see exception message below
        :param toil.common.Config config: see AbstractJobStore.__init__
        :return: an instance of a concrete subclass of AbstractJobStore
        :rtype: toil.jobStores.abstractJobStore.AbstractJobStore
        """
        if jobStoreString[0] in '/.':
            jobStoreString = 'file:' + jobStoreString

        try:
            jobStoreName, jobStoreArgs = jobStoreString.split(':', 1)
        except ValueError:
            raise RuntimeError(
                'A job store string must either be a path starting in . or / or a contain at '
                'least one colon separating the name of the job store implementation from an '
                'initialization string specific to that job store. If a path starting in . or / '
                'is passed, the file job store will be used for backwards compatibility.'
            )

        if jobStoreName == 'file':
            from toil.jobStores.fileJobStore import FileJobStore
            return FileJobStore(jobStoreArgs, config=config)

        elif jobStoreName == 'aws':
            from toil.jobStores.aws.jobStore import AWSJobStore
            return AWSJobStore.loadOrCreateJobStore(jobStoreArgs,
                                                    config=config)

        elif jobStoreName == 'azure':
            from toil.jobStores.azureJobStore import AzureJobStore
            account, namePrefix = jobStoreArgs.split(':', 1)
            return AzureJobStore(account, namePrefix, config=config)

        elif jobStoreName == 'google':
            from toil.jobStores.googleJobStore import GoogleJobStore
            projectID, namePrefix = jobStoreArgs.split(':', 1)
            return GoogleJobStore(namePrefix, projectID, config=config)
        else:
            raise RuntimeError("Unknown job store implementation '%s'" %
                               jobStoreName)
Ejemplo n.º 5
0
    def loadOrCreateJobStore(locator, config=None):
        """
        Loads an existing jobStore if it already exists. Otherwise a new instance of a jobStore is
        created and returned.

        :param str locator: The location of the job store.
        :param toil.common.Config config: see AbstractJobStore.__init__
        :return: an instance of a concrete subclass of AbstractJobStore
        :rtype: toil.jobStores.abstractJobStore.AbstractJobStore
        """
        if locator[0] in '/.':
            locator = 'file:' + locator

        try:
            jobStoreName, jobStoreArgs = locator.split(':', 1)
        except ValueError:
            raise RuntimeError('Invalid job store locator for proper formatting check locator '
                               'documentation for each job store.')

        if jobStoreName == 'file':
            from toil.jobStores.fileJobStore import FileJobStore
            return FileJobStore(jobStoreArgs, config=config)

        elif jobStoreName == 'aws':
            from toil.jobStores.aws.jobStore import AWSJobStore
            return AWSJobStore.loadOrCreateJobStore(jobStoreArgs, config=config)

        elif jobStoreName == 'azure':
            from toil.jobStores.azureJobStore import AzureJobStore
            account, namePrefix = jobStoreArgs.split(':', 1)
            return AzureJobStore(account, namePrefix, config=config)
        
        elif jobStoreName == 'google':
            from toil.jobStores.googleJobStore import GoogleJobStore
            projectID, namePrefix = jobStoreArgs.split(':', 1)
            return GoogleJobStore(namePrefix, projectID, config=config)
        else:
            raise RuntimeError("Unknown job store implementation '%s'" % jobStoreName)
Ejemplo n.º 6
0
    def getJobStore(locator):
        """
        Create an instance of the concrete job store implementation that matches the given locator.

        :param str locator: The location of the job store to be represent by the instance

        :return: an instance of a concrete subclass of AbstractJobStore
        :rtype: toil.jobStores.abstractJobStore.AbstractJobStore
        """
        if locator[0] in '/.':
            locator = 'file:' + locator

        try:
            name, rest = locator.split(':', 1)
        except ValueError:
            raise RuntimeError('Invalid job store locator syntax.')

        if name == 'file':
            from toil.jobStores.fileJobStore import FileJobStore
            return FileJobStore(rest)

        elif name == 'aws':
            from toil.jobStores.aws.jobStore import AWSJobStore
            return AWSJobStore(rest)

        elif name == 'azure':
            from toil.jobStores.azureJobStore import AzureJobStore
            account, namePrefix = rest.split(':', 1)
            return AzureJobStore(rest)

        elif name == 'google':
            from toil.jobStores.googleJobStore import GoogleJobStore
            projectID, namePrefix = rest.split(':', 1)
            return GoogleJobStore(namePrefix, projectID, config=config)
        else:
            raise RuntimeError("Unknown job store implementation '%s'" % name)
Ejemplo n.º 7
0
 def _hashTestFile(self, url):
     localFilePath = FileJobStore._extractPathFromUrl(
         urlparse.urlparse(url))
     with open(localFilePath, 'r') as f:
         return hashlib.md5(f.read()).hexdigest()
Ejemplo n.º 8
0
 def _createJobStore(self, config=None):
     return FileJobStore(self.namePrefix, config=config)
Ejemplo n.º 9
0
 def _createJobStore(self):
     return FileJobStore(self.namePrefix)
Ejemplo n.º 10
0
class WorkerTests(ToilTest):
    """Test miscellaneous units of the worker."""
    def setUp(self):
        super(WorkerTests, self).setUp()
        path = self._getTestJobStorePath()
        self.jobStore = FileJobStore(path)
        self.config = Config()
        self.config.jobStore = 'file:%s' % path
        self.jobStore.initialize(self.config)
        self.jobNumber = 0

    @travis_test
    def testNextChainable(self):
        """Make sure chainable/non-chainable jobs are identified correctly."""
        def createTestJobDesc(memory, cores, disk, preemptable, checkpoint):
            """
            Create a JobDescription with no command (representing a Job that
            has already run) and return the JobDescription.
            """
            name = 'job%d' % self.jobNumber
            self.jobNumber += 1

            descClass = CheckpointJobDescription if checkpoint else JobDescription
            jobDesc = descClass(requirements={
                'memory': memory,
                'cores': cores,
                'disk': disk,
                'preemptable': preemptable
            },
                                jobName=name)

            # Assign an ID
            self.jobStore.assignID(jobDesc)

            # Save and return the JobDescription
            return self.jobStore.create(jobDesc)

        for successorType in ['addChild', 'addFollowOn']:
            # Try with the branch point at both child and follow-on stages

            # Identical non-checkpoint jobs should be chainable.
            jobDesc1 = createTestJobDesc(1, 2, 3, True, False)
            jobDesc2 = createTestJobDesc(1, 2, 3, True, False)
            getattr(jobDesc1, successorType)(jobDesc2.jobStoreID)
            chainable = nextChainable(jobDesc1, self.jobStore, self.config)
            self.assertNotEqual(chainable, None)
            self.assertEqual(jobDesc2.jobStoreID, chainable.jobStoreID)

            # Identical checkpoint jobs should not be chainable.
            jobDesc1 = createTestJobDesc(1, 2, 3, True, False)
            jobDesc2 = createTestJobDesc(1, 2, 3, True, True)
            getattr(jobDesc1, successorType)(jobDesc2.jobStoreID)
            self.assertEqual(
                None, nextChainable(jobDesc1, self.jobStore, self.config))

            # If there is no child we should get nothing to chain.
            jobDesc1 = createTestJobDesc(1, 2, 3, True, False)
            self.assertEqual(
                None, nextChainable(jobDesc1, self.jobStore, self.config))

            # If there are 2 or more children we should get nothing to chain.
            jobDesc1 = createTestJobDesc(1, 2, 3, True, False)
            jobDesc2 = createTestJobDesc(1, 2, 3, True, False)
            jobDesc3 = createTestJobDesc(1, 2, 3, True, False)
            getattr(jobDesc1, successorType)(jobDesc2.jobStoreID)
            getattr(jobDesc1, successorType)(jobDesc3.jobStoreID)
            self.assertEqual(
                None, nextChainable(jobDesc1, self.jobStore, self.config))

            # If there is an increase in resource requirements we should get nothing to chain.
            reqs = {
                'memory': 1,
                'cores': 2,
                'disk': 3,
                'preemptable': True,
                'checkpoint': False
            }
            for increased_attribute in ('memory', 'cores', 'disk'):
                jobDesc1 = createTestJobDesc(**reqs)
                reqs[increased_attribute] += 1
                jobDesc2 = createTestJobDesc(**reqs)
                getattr(jobDesc1, successorType)(jobDesc2.jobStoreID)
                self.assertEqual(
                    None, nextChainable(jobDesc1, self.jobStore, self.config))

            # A change in preemptability from True to False should be disallowed.
            jobDesc1 = createTestJobDesc(1, 2, 3, True, False)
            jobDesc2 = createTestJobDesc(1, 2, 3, False, True)
            getattr(jobDesc1, successorType)(jobDesc2.jobStoreID)
            self.assertEqual(
                None, nextChainable(jobDesc1, self.jobStore, self.config))
Ejemplo n.º 11
0
 def _cleanUpExternalStore(url):
     localFilePath = FileJobStore._extractPathFromUrl(urlparse.urlparse(url))
     os.remove(localFilePath)
Ejemplo n.º 12
0
 def _hashUrl(url):
     localFilePath = FileJobStore._extractPathFromUrl(urlparse.urlparse(url))
     with open(localFilePath, 'r') as f:
         return hashlib.md5(f.read()).hexdigest()
Ejemplo n.º 13
0
class WorkerTests(ToilTest):
    """Test miscellaneous units of the worker."""
    def setUp(self):
        super(WorkerTests, self).setUp()
        path = self._getTestJobStorePath()
        self.jobStore = FileJobStore(path)
        self.config = Config()
        self.config.jobStore = 'file:%s' % path
        self.jobStore.initialize(self.config)
        self.jobGraphNumber = 0

    def testNextChainableJobGraph(self):
        """Make sure chainable/non-chainable jobs are identified correctly."""
        def createJobGraph(memory, cores, disk, preemptable, checkpoint):
            """Create a fake-ish Job and JobGraph pair, and return the
            jobGraph."""
            name = 'jobGraph%d' % self.jobGraphNumber
            self.jobGraphNumber += 1

            job = Job()
            job.checkpoint = checkpoint
            with self.jobStore.writeFileStream() as (f, fileStoreID):
                pickle.dump(job, f, pickle.HIGHEST_PROTOCOL)
            command = '_toil %s fooCommand toil True' % fileStoreID
            jobGraph = JobGraph(command=command,
                                memory=memory,
                                cores=cores,
                                disk=disk,
                                unitName=name,
                                jobName=name,
                                preemptable=preemptable,
                                jobStoreID=name,
                                remainingRetryCount=1,
                                predecessorNumber=1)
            return self.jobStore.create(jobGraph)

        # Identical non-checkpoint jobs should be chainable.
        jobGraph1 = createJobGraph(1, 2, 3, True, False)
        jobGraph2 = createJobGraph(1, 2, 3, True, False)
        jobGraph1.stack = [[jobGraph2]]
        self.assertEquals(jobGraph2,
                          nextChainableJobGraph(jobGraph1, self.jobStore))

        # Identical checkpoint jobs should not be chainable.
        jobGraph1 = createJobGraph(1, 2, 3, True, False)
        jobGraph2 = createJobGraph(1, 2, 3, True, True)
        jobGraph1.stack = [[jobGraph2]]
        self.assertEquals(None, nextChainableJobGraph(jobGraph1,
                                                      self.jobStore))

        # If there is no child we should get nothing to chain.
        jobGraph1 = createJobGraph(1, 2, 3, True, False)
        jobGraph1.stack = []
        self.assertEquals(None, nextChainableJobGraph(jobGraph1,
                                                      self.jobStore))

        # If there are 2 or more children we should get nothing to chain.
        jobGraph1 = createJobGraph(1, 2, 3, True, False)
        jobGraph2 = createJobGraph(1, 2, 3, True, False)
        jobGraph3 = createJobGraph(1, 2, 3, True, False)
        jobGraph1.stack = [[jobGraph2, jobGraph3]]
        self.assertEquals(None, nextChainableJobGraph(jobGraph1,
                                                      self.jobStore))

        # If there is an increase in resource requirements we should get nothing to chain.
        reqs = {
            'memory': 1,
            'cores': 2,
            'disk': 3,
            'preemptable': True,
            'checkpoint': False
        }
        for increased_attribute in ('memory', 'cores', 'disk'):
            jobGraph1 = createJobGraph(**reqs)
            reqs[increased_attribute] += 1
            jobGraph2 = createJobGraph(**reqs)
            jobGraph1.stack = [[jobGraph2]]
            self.assertEquals(None,
                              nextChainableJobGraph(jobGraph1, self.jobStore))

        # A change in preemptability from True to False should be disallowed.
        jobGraph1 = createJobGraph(1, 2, 3, True, False)
        jobGraph2 = createJobGraph(1, 2, 3, False, True)
        jobGraph1.stack = [[jobGraph2]]
        self.assertEquals(None, nextChainableJobGraph(jobGraph1,
                                                      self.jobStore))
Ejemplo n.º 14
0
 def _cleanUpExternalStore(url):
     localFilePath = FileJobStore._extractPathFromUrl(
         urlparse.urlparse(url))
     os.remove(localFilePath)