def save(self): attributes, numNewContentChunks = self.toItem() # False stands for absence expected = ['version', False if self.previousVersion is None else self.previousVersion] try: for attempt in retry_sdb(): with attempt: assert self.outer.filesDomain.put_attributes(item_name=self.fileID, attributes=attributes, expected_value=expected) if self.previousVersion: self.outer.filesBucket.delete_key(self.fileID, version_id=self.previousVersion) self._previousVersion = self._version if numNewContentChunks < self._numContentChunks: residualChunks = xrange(numNewContentChunks, self._numContentChunks) attributes = [self._chunkName(i) for i in residualChunks] for attempt in retry_sdb(): with attempt: self.outer.filesDomain.delete_attributes(self.fileID, attributes=attributes) self._numContentChunks = numNewContentChunks except SDBResponseError as e: if e.error_code == 'ConditionalCheckFailed': raise ConcurrentFileModificationException(self.fileID) else: raise
def delete(self, jobStoreID): # remove job and replace with jobStoreId. log.debug("Deleting job %s", jobStoreID) for attempt in retry_sdb(): with attempt: self.jobsDomain.delete_attributes(item_name=jobStoreID) items = None for attempt in retry_sdb(): with attempt: items = list(self.filesDomain.select( consistent_read=True, query="select version from `%s` where ownerID='%s'" % ( self.filesDomain.name, jobStoreID))) assert items is not None if items: log.debug("Deleting %d file(s) associated with job %s", len(items), jobStoreID) n = self.itemsPerBatchDelete batches = [items[i:i + n] for i in range(0, len(items), n)] for batch in batches: itemsDict = {item.name: None for item in batch} for attempt in retry_sdb(): with attempt: self.filesDomain.batch_delete_attributes(itemsDict) for item in items: version = item.get('version') if version: self.filesBucket.delete_key(key_name=item.name, version_id=version) else: self.filesBucket.delete_key(key_name=item.name)
def save(self): attributes, numNewContentChunks = self.toItem() # False stands for absence expected = ['version', False if self.previousVersion is None else self.previousVersion] try: for attempt in retry_sdb(): with attempt: assert self.outer.filesDomain.put_attributes(item_name=self.fileID, attributes=attributes, expected_value=expected) # clean up the old version of the file if necessary and safe if self.previousVersion and (self.previousVersion != self.version): self.outer.filesBucket.delete_key(self.fileID, version_id=self.previousVersion) self._previousVersion = self._version if numNewContentChunks < self._numContentChunks: residualChunks = xrange(numNewContentChunks, self._numContentChunks) attributes = [self._chunkName(i) for i in residualChunks] for attempt in retry_sdb(): with attempt: self.outer.filesDomain.delete_attributes(self.fileID, attributes=attributes) self._numContentChunks = numNewContentChunks except SDBResponseError as e: if e.error_code == 'ConditionalCheckFailed': raise ConcurrentFileModificationException(self.fileID) else: raise
def __init__(self, region, namePrefix, config=None, partSize=defaultPartSize): """ Create a new job store in AWS or load an existing one from there. :param region: the AWS region to create the job store in, e.g. 'us-west-2' :param namePrefix: S3 bucket names and SDB tables will be prefixed with this :param config: the config object to written to this job store. Must be None for existing job stores. Must not be None for new job stores. """ log.debug("Instantiating %s for region %s and name prefix '%s'", self.__class__, region, namePrefix) self.region = region self.namePrefix = namePrefix self.jobsDomain = None self.filesDomain = None self.filesBucket = None self.db = self._connectSimpleDB() self.s3 = self._connectS3() self.partSize = partSize # Check global registry domain for existence of this job store. The first time this is # being executed in an AWS account, the registry domain will be created on the fly. create = config is not None self.registry_domain = self._getOrCreateDomain('toil-registry') for attempt in retry_sdb(): with attempt: attributes = self.registry_domain.get_attributes( item_name=namePrefix, attribute_name='exists', consistent_read=True) exists = strict_bool(attributes.get('exists', str(False))) self._checkJobStoreCreation(create, exists, region + ":" + namePrefix) def qualify(name): assert len(name) <= self.maxNameLen return self.namePrefix + self.nameSeparator + name self.jobsDomain = self._getOrCreateDomain(qualify('jobs')) self.filesDomain = self._getOrCreateDomain(qualify('files')) self.filesBucket = self._getOrCreateBucket(qualify('files'), versioning=True) # Now register this job store for attempt in retry_sdb(): with attempt: self.registry_domain.put_attributes( item_name=namePrefix, attributes=dict(exists='True')) super(AWSJobStore, self).__init__(config=config) self.sseKeyPath = self.config.sseKey
def __init__(self, region, namePrefix, config=None, partSize=defaultPartSize): """ Create a new job store in AWS or load an existing one from there. :param region: the AWS region to create the job store in, e.g. 'us-west-2' :param namePrefix: S3 bucket names and SDB tables will be prefixed with this :param config: the config object to written to this job store. Must be None for existing job stores. Must not be None for new job stores. """ log.debug("Instantiating %s for region %s and name prefix '%s'", self.__class__, region, namePrefix) self.region = region self.namePrefix = namePrefix self.jobsDomain = None self.filesDomain = None self.filesBucket = None self.db = self._connectSimpleDB() self.s3 = self._connectS3() self.partSize = partSize # Check global registry domain for existence of this job store. The first time this is # being executed in an AWS account, the registry domain will be created on the fly. create = config is not None self.registry_domain = self._getOrCreateDomain('toil-registry') for attempt in retry_sdb(): with attempt: attributes = self.registry_domain.get_attributes(item_name=namePrefix, attribute_name='exists', consistent_read=True) exists = strict_bool(attributes.get('exists', str(False))) self._checkJobStoreCreation(create, exists, region + ":" + namePrefix) def qualify(name): assert len(name) <= self.maxNameLen return self.namePrefix + self.nameSeparator + name self.jobsDomain = self._getOrCreateDomain(qualify('jobs')) self.filesDomain = self._getOrCreateDomain(qualify('files')) self.filesBucket = self._getOrCreateBucket(qualify('files'), versioning=True) # Now register this job store for attempt in retry_sdb(): with attempt: self.registry_domain.put_attributes(item_name=namePrefix, attributes=dict(exists='True')) super(AWSJobStore, self).__init__(config=config) self.sseKeyPath = self.config.sseKey
def load(cls, jobStoreFileID): for attempt in retry_sdb(): with attempt: self = cls.fromItem( cls.outer.filesDomain.get_attributes(item_name=jobStoreFileID, consistent_read=True)) return self
def exists(self, jobStoreID): for attempt in retry_sdb(): with attempt: return bool( self.jobsDomain.get_attributes(item_name=jobStoreID, attribute_name=[], consistent_read=True))
def jobs(self): result = None for attempt in retry_sdb(): with attempt: result = list(self.jobsDomain.select( consistent_read=True, query="select * from `%s`" % self.jobsDomain.name)) assert result is not None for jobItem in result: yield AWSJob.fromItem(jobItem)
def delete(self): store = self.outer if self.previousVersion is not None: for attempt in retry_sdb(): with attempt: store.filesDomain.delete_attributes( self.fileID, expected_values=['version', self.previousVersion]) if self.previousVersion: store.filesBucket.delete_key(key_name=self.fileID, version_id=self.previousVersion)
def create(self, command, memory, cores, disk, predecessorNumber=0): jobStoreID = self._newJobID() log.debug("Creating job %s for '%s'", jobStoreID, '<no command>' if command is None else command) job = AWSJob(jobStoreID=jobStoreID, command=command, memory=memory, cores=cores, disk=disk, remainingRetryCount=self._defaultTryCount(), logJobStoreFileID=None, predecessorNumber=predecessorNumber) for attempt in retry_sdb(): with attempt: assert self.jobsDomain.put_attributes(*job.toItem()) return job
def load(self, jobStoreID): item = None for attempt in retry_sdb(): with attempt: item = self.jobsDomain.get_attributes(jobStoreID, consistent_read=True) if not item: raise NoSuchJobException(jobStoreID) job = AWSJob.fromItem(item) if job is None: raise NoSuchJobException(jobStoreID) log.debug("Loaded job %s", jobStoreID) return job
def create(self, command, memory, cores, disk, preemptable, predecessorNumber=0): jobStoreID = self._newJobID() log.debug("Creating job %s for '%s'", jobStoreID, '<no command>' if command is None else command) job = AWSJob(jobStoreID=jobStoreID, command=command, memory=memory, cores=cores, disk=disk, preemptable=preemptable, remainingRetryCount=self._defaultTryCount(), logJobStoreFileID=None, predecessorNumber=predecessorNumber) for attempt in retry_sdb(): with attempt: assert self.jobsDomain.put_attributes(*job.toItem()) return job
def _readStatsAndLogging(self, callback, ownerId): items = None for attempt in retry_sdb(): with attempt: items = list(self.filesDomain.select( consistent_read=True, query="select * from `%s` where ownerID='%s'" % ( self.filesDomain.name, str(ownerId)))) assert items is not None for item in items: info = self.FileInfo.fromItem(item) with info.downloadStream() as readable: callback(readable) yield info
def _getOrCreateDomain(self, domain_name): """ Return the boto Domain object representing the SDB domain with the given name. If the domain does not exist it will be created. :param domain_name: the unqualified name of the domain to be created :rtype : Domain """ try: return self.db.get_domain(domain_name) except SDBResponseError as e: if no_such_domain(e): for attempt in retry_sdb(retry_while=sdb_unavailable): with attempt: return self.db.create_domain(domain_name) else: raise
def update(self, job): log.debug("Updating job %s", job.jobStoreID) for attempt in retry_sdb(): with attempt: assert self.jobsDomain.put_attributes(*job.toItem())
def exists(self, jobStoreID): for attempt in retry_sdb(): with attempt: return bool(self.jobsDomain.get_attributes(item_name=jobStoreID, attribute_name=[], consistent_read=True))