def __updateSandBox( self, jobid, output ): jobInfo = BigDataDB.getJobIDInfo( jobid ) result = self.sshConnect.scpCall( 100, output, output, False ) if not result['OK']: self.log.error( 'Error to get the data from BigData Software DFS:', result ) file_paths = [] file_paths.append( output ) outputSandbox = file_paths resolvedSandbox = self.__resolveOutputSandboxFiles( outputSandbox ) if not resolvedSandbox['OK']: self.log.warn( 'Output sandbox file resolution failed:' ) self.log.warn( resolvedSandbox['Message'] ) self.__report( 'Failed', 'Resolving Output Sandbox' ) fileList = resolvedSandbox['Value']['Files'] missingFiles = resolvedSandbox['Value']['Missing'] if missingFiles: self.jobReport.setJobParameter( 'OutputSandboxMissingFiles', ', '.join( missingFiles ), sendFlag = False ) if fileList and jobid: self.outputSandboxSize = getGlobbedTotalSize( fileList ) self.log.info( 'Attempting to upload Sandbox with limit:', self.sandboxSizeLimit ) result = self.sandboxClient.uploadFilesAsSandboxForJob( fileList, jobid, 'Output', self.sandboxSizeLimit ) # 1024*1024*10 if not result['OK']: self.log.error( 'Output sandbox upload failed with message', result['Message'] ) if result.has_key( 'SandboxFileName' ): outputSandboxData = result['SandboxFileName'] self.log.info( 'Attempting to upload %s as output data' % ( outputSandboxData ) ) outputData.append( outputSandboxData ) self.jobReport.setJobParameter( 'OutputSandbox', 'Sandbox uploaded to grid storage', sendFlag = False ) self.jobReport.setJobParameter( 'OutputSandboxLFN', self.__getLFNfromOutputFile( outputSandboxData )[0], sendFlag = False ) else: self.log.info( 'Could not get SandboxFileName to attempt upload to Grid storage' ) return S_ERROR( 'Output sandbox upload failed and no file name supplied for failover to Grid storage' ) else: # Do not overwrite in case of Error if not self.failedFlag: self.__report( 'Completed', 'Output Sandbox Uploaded' ) self.log.info( 'Sandbox uploaded successfully' ) return "OK"
def __updateSandBox(self, jobid, software, version, hll, hllversion, cli): jobInfo = BigDataDB.getJobIDInfo(jobid) source = ( self.__tmpSandBoxDir + str(jobid) + "/InputSandbox" + str(jobid) + "/" + self.__getJobName(jobInfo[0][0]).replace(" ", "") + "_" + str(jobid) ) dest = ( self.__tmpSandBoxDir + str(jobid) + "/" + self.__getJobName(jobInfo[0][0]).replace(" ", "") + "_" + str(jobid) ) result = 0 if (software == "hadoop") and (version == "hdv1") and (hll == "none"): result = cli.getData(source, dest) if (software == "hadoop") and (version == "hdv2") and (hll == "none"): result = cli.getData(source, dest) if not result["OK"]: self.log.error("Error to get the data from BigData Software DFS:", result) result = cli.getdata(dest, dest) if not result["OK"]: self.log.error("Error to get the data from BigData Cluster to DIRAC:", result) outputSandbox = self.get_filepaths(dest) resolvedSandbox = self.__resolveOutputSandboxFiles(outputSandbox) if not resolvedSandbox["OK"]: self.log.warn("Output sandbox file resolution failed:") self.log.warn(resolvedSandbox["Message"]) self.__report("Failed", "Resolving Output Sandbox") self.fileList = resolvedSandbox["Value"]["Files"] missingFiles = resolvedSandbox["Value"]["Missing"] if missingFiles: self.jobReport.setJobParameter("OutputSandboxMissingFiles", ", ".join(missingFiles), sendFlag=False) if self.fileList and jobid: self.outputSandboxSize = getGlobbedTotalSize(self.fileList) self.log.info("Attempting to upload Sandbox with limit:", self.sandboxSizeLimit) result = self.sandboxClient.uploadFilesAsSandboxForJob( self.fileList, jobid, "Output", self.sandboxSizeLimit ) # 1024*1024*10 if not result["OK"]: self.log.error("Output sandbox upload failed with message", result["Message"]) if result.has_key("SandboxFileName"): outputSandboxData = result["SandboxFileName"] self.log.info("Attempting to upload %s as output data" % (outputSandboxData)) outputData.append(outputSandboxData) self.jobReport.setJobParameter("OutputSandbox", "Sandbox uploaded to grid storage", sendFlag=False) self.jobReport.setJobParameter( "OutputSandboxLFN", self.__getLFNfromOutputFile(outputSandboxData)[0], sendFlag=False ) else: self.log.info("Could not get SandboxFileName to attempt upload to Grid storage") return S_ERROR( "Output sandbox upload failed and no file name supplied for failover to Grid storage" ) else: # Do not overwrite in case of Error if not self.failedFlag: self.__report("Completed", "Output Sandbox Uploaded") self.log.info("Sandbox uploaded successfully") return "OK"
def __updateInteractiveSandBox(self, jobid, software, version, hll, hllversion, cli): # Detele content of InputSandbox jobInfo = BigDataDB.getJobIDInfo(jobid) source = self.__tmpSandBoxDir + str(jobid) + "/*_out" dest = self.__tmpSandBoxDir + str(jobid) result = 0 result = cli.delHadoopData(self.__tmpSandBoxDir + str(jobid) + "/InputSandbox" + str(jobid)) self.log.debug("ATENTION::Deleting InputSandBox Contain:", result) result = cli.getdata(dest, source) self.log.debug("Step 0:getting data from hadoop:", result) if not result["OK"]: self.log.error("Error to get the data from BigData Cluster to DIRAC:", result) self.log.debug("Step:1:GetFilePaths:") outputSandbox = self.get_filepaths(self.__tmpSandBoxDir + str(jobid)) self.log.debug("Step:2:OutputSandBox:", self.__tmpSandBoxDir + str(jobid)) self.log.debug("Step:2:OutputSandBox:", outputSandbox) resolvedSandbox = self.__resolveOutputSandboxFiles(outputSandbox) self.log.debug("Step:3:ResolveSandbox:", resolvedSandbox) if not resolvedSandbox["OK"]: self.log.warn("Output sandbox file resolution failed:") self.log.warn(resolvedSandbox["Message"]) self.__report("Failed", "Resolving Output Sandbox") self.fileList = resolvedSandbox["Value"]["Files"] missingFiles = resolvedSandbox["Value"]["Missing"] if missingFiles: self.jobReport.setJobParameter("OutputSandboxMissingFiles", ", ".join(missingFiles), sendFlag=False) if self.fileList and jobid: self.outputSandboxSize = getGlobbedTotalSize(self.fileList) self.log.info("Attempting to upload Sandbox with limit:", self.sandboxSizeLimit) result = self.sandboxClient.uploadFilesAsSandboxForJob( self.fileList, jobid, "Output", self.sandboxSizeLimit ) # 1024*1024*10 if not result["OK"]: self.log.error("Output sandbox upload failed with message", result["Message"]) if result.has_key("SandboxFileName"): outputSandboxData = result["SandboxFileName"] self.log.info("Attempting to upload %s as output data" % (outputSandboxData)) outputData.append(outputSandboxData) self.jobReport.setJobParameter("OutputSandbox", "Sandbox uploaded to grid storage", sendFlag=False) self.jobReport.setJobParameter( "OutputSandboxLFN", self.__getLFNfromOutputFile(outputSandboxData)[0], sendFlag=False ) else: self.log.info("Could not get SandboxFileName to attempt upload to Grid storage") return S_ERROR( "Output sandbox upload failed and no file name supplied for failover to Grid storage" ) else: # Do not overwrite in case of Error if not self.failedFlag: self.__report("Completed", "Output Sandbox Uploaded") self.log.info("Sandbox uploaded successfully") return "OK"