Ejemplo n.º 1
0
    def run_group_request(self, session_id, message_type, message):
        """Handle a run-group request message"""
        pipeline = cellprofiler_core.pipeline.Pipeline()
        m = Measurements()
        image_group = m.hdf5_dict.hdf5_file.create_group("ImageData")
        if len(message) < 2:
            self.raise_cellprofiler_exception(session_id,
                                              "Missing run request sections")
            return
        pipeline_txt = message.pop(0).bytes
        image_metadata = message.pop(0).bytes
        n_image_sets = None
        try:
            image_metadata = json.loads(image_metadata)
            channel_names = []
            for channel_name, channel_metadata in image_metadata:
                channel_names.append(channel_name)
                if len(message) < 1:
                    self.raise_cellprofiler_exception(
                        session_id,
                        "Missing binary data for channel %s" % channel_name)
                    return None, None, None
                pixel_data = self.decode_image(channel_metadata,
                                               message.pop(0).bytes,
                                               grouping_allowed=True)
                if pixel_data.ndim < 3:
                    self.raise_cellprofiler_exception(
                        session_id,
                        "The image for channel %s does not have a Z or T dimension",
                    )
                    return
                if n_image_sets is None:
                    n_image_sets = pixel_data.shape[0]
                elif n_image_sets != pixel_data.shape[0]:
                    self.raise_cellprofiler_exception(
                        session_id,
                        "The images passed have different numbers of Z or T planes",
                    )
                    return
                image_group.create_dataset(channel_name, data=pixel_data)
        except Exception as e:
            self.raise_cellprofiler_exception(session_id, e)
            return None, None, None
        try:
            pipeline.loadtxt(StringIO(pipeline_txt))
        except Exception as e:
            logging.warning(
                "Failed to load pipeline: sending pipeline exception")
            self.raise_pipeline_exception(session_id, str(e))
            return

        image_numbers = numpy.arange(1, n_image_sets + 1)
        for image_number in image_numbers:
            m["Image", GROUP_NUMBER, image_number, ] = 1
            m["Image", GROUP_INDEX, image_number, ] = image_number
        input_modules, other_modules = self.split_pipeline(pipeline)
        workspace = cellprofiler_core.workspace.Workspace(
            pipeline, None, m, None, m, None)
        logging.info("Preparing group")
        for module in other_modules:
            module.prepare_group(
                workspace,
                dict([("image_number", i) for i in image_numbers]),
                image_numbers,
            )

        for image_index in range(n_image_sets):
            object_set = cellprofiler_core.object.ObjectSet()
            m.next_image_set(image_index + 1)
            for channel_name in channel_names:
                dataset = image_group[channel_name]
                pixel_data = dataset[image_index]
                m.add(channel_name, Image(pixel_data))

            for module in other_modules:
                workspace = cellprofiler_core.workspace.Workspace(
                    pipeline, module, m, object_set, m, None)
                try:
                    logging.info("Running module # %d: %s" %
                                 (module.module_num, module.module_name))
                    pipeline.run_module(module, workspace)
                    if workspace.disposition in (
                            DISPOSITION_SKIP,
                            DISPOSITION_CANCEL,
                    ):
                        break
                except Exception as e:
                    msg = 'Encountered error while running module, "%s": %s' % (
                        module.module_name,
                        e,
                    )
                    logging.warning(msg)
                    self.raise_cellprofiler_exception(session_id, msg)
                    return
            else:
                continue
            if workspace.disposition == DISPOSITION_CANCEL:
                break
        for module in other_modules:
            module.post_group(
                workspace, dict([("image_number", i) for i in image_numbers]))
        logging.info("Finished group")

        type_names, feature_dict = self.find_measurements(
            other_modules, pipeline)

        double_features = []
        double_data = []
        float_features = []
        float_data = []
        int_features = []
        int_data = []
        string_features = []
        string_data = []
        metadata = [
            double_features, float_features, int_features, string_features
        ]

        for object_name, features in list(feature_dict.items()):
            df = []
            double_features.append((object_name, df))
            ff = []
            float_features.append((object_name, ff))
            intf = []
            int_features.append((object_name, intf))
            sf = []
            string_features.append((object_name, sf))
            if object_name == "Image":
                object_counts = [] * n_image_sets
            else:
                object_numbers = m[object_name, OBJECT_NUMBER, image_numbers, ]
                object_counts = [len(x) for x in object_numbers]
            for feature, data_type in features:
                if data_type == "java.lang.String":
                    continue
                if not m.has_feature(object_name, feature):
                    data = numpy.zeros(numpy.sum(object_counts))
                else:
                    data = m[object_name, feature, image_numbers]
                temp = []
                for i, (di, count) in enumerate(zip(data, object_counts)):
                    if count == 0:
                        continue
                    di = numpy.atleast_1d(di)
                    if len(di) > count:
                        di = di[:count]
                    elif len(di) == count:
                        temp.append(di)
                    else:
                        temp += [di + numpy.zeros(len(di) - count)]
                if len(temp) > 0:
                    data = numpy.hstack(temp)

                if type_names[data_type] == "java.lang.Double":
                    df.append((feature, len(data)))
                    if len(data) > 0:
                        double_data.append(data.astype("<f8"))
                elif type_names[data_type] == "java.lang.Float":
                    ff.append((feature, len(data)))
                    if len(data) > 0:
                        float_data.append(data.astype("<f4"))
                elif type_names[data_type] == "java.lang.Integer":
                    intf.append((feature, len(data)))
                    if len(data) > 0:
                        int_data.append(data.astype("<i4"))
        data = numpy.hstack([
            numpy.frombuffer(
                numpy.ascontiguousarray(numpy.hstack(ditem)).data, numpy.uint8)
            for ditem in (double_data, float_data, int_data) if len(ditem) > 0
        ])
        data = numpy.ascontiguousarray(data)
        self.socket.send_multipart([
            zmq.Frame(session_id),
            zmq.Frame(),
            zmq.Frame(RUN_REPLY_1),
            zmq.Frame(json.dumps(metadata)),
            zmq.Frame(data),
        ])
Ejemplo n.º 2
0
def get_batch_commands(filename, n_per_job=1):
    """Print the commands needed to run the given batch data file headless

    filename - the name of a Batch_data.h5 file. The file should group image sets.

    The output assumes that the executable, "CellProfiler", can be used
    to run the command from the shell. Alternatively, the output could be
    run through a utility such as "sed":

    CellProfiler --get-batch-commands Batch_data.h5 | sed s/CellProfiler/farm_job.sh/
    """
    path = os.path.expanduser(filename)

    m = Measurements(filename=path, mode="r")

    image_numbers = m.get_image_numbers()

    if m.has_feature(IMAGE, GROUP_NUMBER):
        group_numbers = m[IMAGE, GROUP_NUMBER, image_numbers, ]

        group_indexes = m[IMAGE, GROUP_INDEX, image_numbers, ]

        if numpy.any(group_numbers != 1) and numpy.all(
            (group_indexes[1:] == group_indexes[:-1] + 1)
                | ((group_indexes[1:] == 1)
                   & (group_numbers[1:] == group_numbers[:-1] + 1))):
            #
            # Do -f and -l if more than one group and group numbers
            # and indices are properly constructed
            #
            bins = numpy.bincount(group_numbers)

            cumsums = numpy.cumsum(bins)

            prev = 0

            for i, off in enumerate(cumsums):
                if off == prev:
                    continue

                print("CellProfiler -c -r -p %s -f %d -l %d" %
                      (filename, prev + 1, off))

                prev = off
    else:
        metadata_tags = m.get_grouping_tags()

        if len(metadata_tags) == 1 and metadata_tags[0] == "ImageNumber":
            for i in range(0, len(image_numbers), n_per_job):
                first = image_numbers[i]
                last = image_numbers[min(i + n_per_job - 1,
                                         len(image_numbers) - 1)]
                print("CellProfiler -c -r -p %s -f %d -l %d" %
                      (filename, first, last))
        else:
            # LoadData w/ images grouped by metadata tags
            groupings = m.get_groupings(metadata_tags)

            for grouping in groupings:
                group_string = ",".join(
                    ["%s=%s" % (k, v) for k, v in list(grouping[0].items())])

                print("CellProfiler -c -r -p %s -g %s" %
                      (filename, group_string))
    return