def execute(self, slot, subindex, ignored_roi, result): configFilePath = self.ConfigFilePath.value config = parseClusterConfigFile(configFilePath) blockwiseFileset = self._primaryBlockwiseFileset # Check axis compatibility inputAxes = self.Input.meta.getTaggedShape().keys() outputAxes = list(blockwiseFileset.description.axes) assert set(inputAxes) == set(outputAxes), \ "Output dataset has the wrong set of axes. Input axes: {}, Output axes: {}".format( "".join(inputAxes), "".join(outputAxes) ) roiString = self.RoiString.value roi = Roi.loads(roiString) if len(roi.start) != len(self.Input.meta.shape): assert False, "Task roi: {} is not valid for this input. Did the master launch this task correctly?".format( roiString) logger.info("Executing for roi: {}".format(roi)) if config.use_node_local_scratch: assert False, "FIXME." assert ( blockwiseFileset.getEntireBlockRoi(roi.start)[1] == roi.stop ).all( ), "Each task must execute exactly one full block. ({},{}) is not a valid block roi.".format( roi.start, roi.stop) assert self.Input.ready() # Convert the task subrequest shape dict into a shape for this dataset (and axisordering) subrequest_shape = map( lambda tag: config.task_subrequest_shape[tag.key], self.Input.meta.axistags) primary_subrequest_shape = self._primaryBlockwiseFileset.description.sub_block_shape if primary_subrequest_shape is not None: # If the output dataset specified a sub_block_shape, override the cluster config subrequest_shape = primary_subrequest_shape with Timer() as computeTimer: # Stream the data out to disk. streamer = BigRequestStreamer(self.Input, (roi.start, roi.stop), subrequest_shape, config.task_parallel_subrequests) streamer.progressSignal.subscribe(self.progressSignal) streamer.resultSignal.subscribe(self._handlePrimaryResultBlock) streamer.execute() # Now the block is ready. Update the status. blockwiseFileset.setBlockStatus(roi.start, BlockwiseFileset.BLOCK_AVAILABLE) logger.info("Finished task in {} seconds".format( computeTimer.seconds())) result[0] = True return result
def _prepareTaskInfos(self, roiList): # Divide up the workload into large pieces logger.info("Dividing into {} node jobs.".format(len(roiList))) taskInfos = collections.OrderedDict() for roiIndex, roi in enumerate(roiList): roi = (tuple(roi[0]), tuple(roi[1])) taskInfo = OpClusterize.TaskInfo() taskInfo.subregion = SubRegion(None, start=roi[0], stop=roi[1]) taskName = "J{:02}".format(roiIndex) commandArgs = [] commandArgs.append("--option_config_file=" + self.ConfigFilePath.value) commandArgs.append("--project=" + self.ProjectFilePath.value) commandArgs.append("--_node_work_=\"" + Roi.dumps(taskInfo.subregion) + "\"") commandArgs.append("--process_name={}".format(taskName)) commandArgs.append("--output_description_file={}".format( self.OutputDatasetDescription.value)) for slot in self.SecondaryOutputDescriptions: commandArgs.append( "--secondary_output_description_file={}".format( slot.value)) # Check the command format string: We need to know where to put our args... commandFormat = self._config.command_format assert commandFormat.find("{task_args}") != -1 # Output log directory might be a relative path (relative to config file) absLogDir, relLogDir = getPathVariants( self._config.output_log_directory, os.path.split(self.ConfigFilePath.value)[0]) taskOutputLogFilename = taskName + ".log" taskOutputLogPath = os.path.join(absLogDir, taskOutputLogFilename) allArgs = " " + " ".join(commandArgs) + " " taskInfo.taskName = taskName taskInfo.command = commandFormat.format( task_args=allArgs, task_name=taskName, task_output_file=taskOutputLogPath) taskInfos[roi] = taskInfo return taskInfos
def execute(self, slot, subindex, ignored_roi, result): configFilePath = self.ConfigFilePath.value config = parseClusterConfigFile( configFilePath ) blockwiseFileset = self._primaryBlockwiseFileset # Check axis compatibility inputAxes = self.Input.meta.getTaggedShape().keys() outputAxes = list(blockwiseFileset.description.axes) assert set(inputAxes) == set(outputAxes), \ "Output dataset has the wrong set of axes. Input axes: {}, Output axes: {}".format( "".join(inputAxes), "".join(outputAxes) ) roiString = self.RoiString.value roi = Roi.loads(roiString) if len( roi.start ) != len( self.Input.meta.shape ): assert False, "Task roi: {} is not valid for this input. Did the master launch this task correctly?".format( roiString ) logger.info( "Executing for roi: {}".format(roi) ) if config.use_node_local_scratch: assert False, "FIXME." assert (blockwiseFileset.getEntireBlockRoi( roi.start )[1] == roi.stop).all(), "Each task must execute exactly one full block. ({},{}) is not a valid block roi.".format( roi.start, roi.stop ) assert self.Input.ready() # Convert the task subrequest shape dict into a shape for this dataset (and axisordering) subrequest_shape = map( lambda tag: config.task_subrequest_shape[tag.key], self.Input.meta.axistags ) primary_subrequest_shape = self._primaryBlockwiseFileset.description.sub_block_shape if primary_subrequest_shape is not None: # If the output dataset specified a sub_block_shape, override the cluster config subrequest_shape = primary_subrequest_shape with Timer() as computeTimer: # Stream the data out to disk. streamer = BigRequestStreamer(self.Input, (roi.start, roi.stop), subrequest_shape, config.task_parallel_subrequests ) streamer.progressSignal.subscribe( self.progressSignal ) streamer.resultSignal.subscribe( self._handlePrimaryResultBlock ) streamer.execute() # Now the block is ready. Update the status. blockwiseFileset.setBlockStatus( roi.start, BlockwiseFileset.BLOCK_AVAILABLE ) logger.info( "Finished task in {} seconds".format( computeTimer.seconds() ) ) result[0] = True return result
def _prepareTaskInfos(self, roiList): # Divide up the workload into large pieces logger.info( "Dividing into {} node jobs.".format( len(roiList) ) ) taskInfos = collections.OrderedDict() for roiIndex, roi in enumerate(roiList): roi = ( tuple(roi[0]), tuple(roi[1]) ) taskInfo = OpClusterize.TaskInfo() taskInfo.subregion = SubRegion( None, start=roi[0], stop=roi[1] ) taskName = "J{:02}".format(roiIndex) commandArgs = [] commandArgs.append( "--option_config_file=" + self.ConfigFilePath.value ) commandArgs.append( "--project=" + self.ProjectFilePath.value ) commandArgs.append( "--_node_work_=\"" + Roi.dumps( taskInfo.subregion ) + "\"" ) commandArgs.append( "--process_name={}".format(taskName) ) commandArgs.append( "--output_description_file={}".format( self.OutputDatasetDescription.value ) ) for slot in self.SecondaryOutputDescriptions: commandArgs.append( "--secondary_output_description_file={}".format( slot.value ) ) # Check the command format string: We need to know where to put our args... commandFormat = self._config.command_format assert commandFormat.find("{task_args}") != -1 # Output log directory might be a relative path (relative to config file) absLogDir, _ = getPathVariants(self._config.output_log_directory, os.path.split( self.ConfigFilePath.value )[0] ) taskOutputLogFilename = taskName + ".log" taskOutputLogPath = os.path.join( absLogDir, taskOutputLogFilename ) allArgs = " " + " ".join(commandArgs) + " " taskInfo.taskName = taskName taskInfo.command = commandFormat.format( task_args=allArgs, task_name=taskName, task_output_file=taskOutputLogPath ) taskInfos[roi] = taskInfo return taskInfos