def process(self, file): # decide number of parallel stream to be used fsize = os.path.getsize(urlparse(file)[2]) fname = os.path.basename(urlparse(file)[2]) fpath = os.path.abspath(urlparse(file)[2]) md5sum = get_md5sum(fpath, ignoreGzipTimestamp=True) nbstream = int((fsize * 1.0) / (10.0 * 1024 * 1024 * 1024)) if nbstream < 1: nbstream = 1 # min stream if nbstream > 8: nbstream = 8 # max stream cmd = 'lcg-cr -t 180 --vo %s -n %d' % ( self.cacheObj.vo, nbstream) if self.cacheObj.se != None: cmd = cmd + ' -d %s' % self.cacheObj.se if self.cacheObj.se_type == 'srmv2' and self.cacheObj.srm_token: cmd = cmd + ' -D srmv2 -s %s' % self.cacheObj.srm_token # specify the physical location cmd = cmd + \ ' -P %s/ganga.%s/%s' % (self.cacheObj.se_rpath, self.dirname, fname) # specify the logical filename # NOTE: here we assume the root dir for VO is /grid/<voname> lfc_dir = '/grid/%s/ganga.%s' % ( self.cacheObj.vo, self.dirname) if not self.cacheObj.__lfc_mkdir__(shell, lfc_dir): self.cacheObj.logger.warning( 'cannot create LFC directory: %s' % lfc_dir) return None cmd = cmd + ' -l %s/%s %s' % (lfc_dir, fname, file) rc, output, m = self.cacheObj.__cmd_retry_loop__( shell, cmd, self.cacheObj.max_try) if rc != 0: return False else: match = re.search('(guid:\S+)', output) if match: guid = match.group(1) fidx = LCGFileIndex() fidx.id = guid fidx.name = fname fidx.md5sum = md5sum fidx.lfc_host = self.cacheObj.lfc_host fidx.local_fpath = fpath self.__appendResult__(file, fidx) return True else: return False
def process(self, file): # decide number of parallel stream to be used fsize = os.path.getsize(urlparse(file)[2]) fname = os.path.basename(urlparse(file)[2]) fpath = os.path.abspath(urlparse(file)[2]) md5sum = get_md5sum(fpath, ignoreGzipTimestamp=True) nbstream = int((fsize * 1.0) / (10.0 * 1024 * 1024 * 1024)) if nbstream < 1: nbstream = 1 # min stream if nbstream > 8: nbstream = 8 # max stream myDestURI = '%s/%s' % (destURI, fname) # uploading the file cmd = 'uberftp' if nbstream > 1: cmd += ' -c %d' % nbstream cmd += ' file:%s %s' % (fpath, myDestURI) rc, output, m = self.cacheObj.__cmd_retry_loop__( shell, cmd, self.cacheObj.max_try) if rc != 0: self.cacheObj.logger.error(output) return False else: fidx = GridftpFileIndex() fidx.id = myDestURI fidx.name = fname fidx.md5sum = md5sum fidx.attributes['fpath'] = fpath self.__appendResult__(file, fidx) return True
def __check_and_prestage_inputfile__(self, file): '''Checks the given input file size and if it's size is over "BoundSandboxLimit", prestage it to a grid SE. The argument is a path of the local file. It returns a dictionary containing information to refer to the file: idx = {'lfc_host': lfc_host, 'local': [the local file pathes], 'remote': {'fname1': 'remote index1', 'fname2': 'remote index2', ... } } If prestaging failed, None object is returned. If the file has been previously uploaded (according to md5sum), the prestaging is ignored and index to the previously uploaded file is returned. ''' idx = {'lfc_host': '', 'local': [], 'remote': {}} job = self.getJobObject() # read-in the previously uploaded files uploadedFiles = [] # getting the uploaded file list from the master job if job.master: uploadedFiles += job.master.backend.sandboxcache.get_cached_files() # set and get the $LFC_HOST for uploading oversized sandbox self.__setup_sandboxcache__(job) uploadedFiles += self.sandboxcache.get_cached_files() lfc_host = None # for LCGSandboxCache, take the one specified in the sansboxcache object. # the value is exactly the same as the one from the local grid shell env. if # it is not specified exclusively. if self.sandboxcache._name == 'LCGSandboxCache': lfc_host = self.sandboxcache.lfc_host # or in general, query it from the Grid object if not lfc_host: lfc_host = grids[ self.sandboxcache.middleware.upper()].__get_lfc_host__() idx['lfc_host'] = lfc_host abspath = os.path.abspath(file) fsize = os.path.getsize(abspath) if fsize > config['BoundSandboxLimit']: md5sum = get_md5sum(abspath, ignoreGzipTimestamp=True) doUpload = True for uf in uploadedFiles: if uf.md5sum == md5sum: # the same file has been uploaded to the iocache idx['remote'][os.path.basename(file)] = uf.id doUpload = False break if doUpload: logger.warning( 'The size of %s is larger than the sandbox limit (%d byte). Please wait while pre-staging ...' % (file, config['BoundSandboxLimit'])) if self.sandboxcache.upload([abspath]): remote_sandbox = self.sandboxcache.get_cached_files()[-1] idx['remote'][remote_sandbox.name] = remote_sandbox.id else: logger.error( 'Oversized sandbox not successfully pre-staged') return None else: idx['local'].append(abspath) return idx
def __check_and_prestage_inputfile__(self, file): '''Checks the given input file size and if it's size is over "BoundSandboxLimit", prestage it to a grid SE. The argument is a path of the local file. It returns a dictionary containing information to refer to the file: idx = {'lfc_host': lfc_host, 'local': [the local file pathes], 'remote': {'fname1': 'remote index1', 'fname2': 'remote index2', ... } } If prestaging failed, None object is returned. If the file has been previously uploaded (according to md5sum), the prestaging is ignored and index to the previously uploaded file is returned. ''' idx = {'lfc_host': '', 'local': [], 'remote': {}} job = self.getJobObject() # read-in the previously uploaded files uploadedFiles = [] # getting the uploaded file list from the master job if job.master: uploadedFiles += job.master.backend.sandboxcache.get_cached_files() # set and get the $LFC_HOST for uploading oversized sandbox self.__setup_sandboxcache__(job) uploadedFiles += self.sandboxcache.get_cached_files() lfc_host = None # for LCGSandboxCache, take the one specified in the sansboxcache object. # the value is exactly the same as the one from the local grid shell env. if # it is not specified exclusively. if self.sandboxcache._name == 'LCGSandboxCache': lfc_host = self.sandboxcache.lfc_host # or in general, query it from the Grid object if not lfc_host: lfc_host = Grid.__get_lfc_host__() idx['lfc_host'] = lfc_host abspath = os.path.abspath(file) fsize = os.path.getsize(abspath) if fsize > config['BoundSandboxLimit']: md5sum = get_md5sum(abspath, ignoreGzipTimestamp=True) doUpload = True for uf in uploadedFiles: if uf.md5sum == md5sum: # the same file has been uploaded to the iocache idx['remote'][os.path.basename(file)] = uf.id doUpload = False break if doUpload: logger.warning( 'The size of %s is larger than the sandbox limit (%d byte). Please wait while pre-staging ...' % (file, config['BoundSandboxLimit'])) if self.sandboxcache.upload([abspath]): remote_sandbox = self.sandboxcache.get_cached_files()[-1] idx['remote'][remote_sandbox.name] = remote_sandbox.id else: logger.error( 'Oversized sandbox not successfully pre-staged') return None else: idx['local'].append(abspath) return idx