예제 #1
0
def test_load_objects():
    r = numpy.random.RandomState()
    r.seed(1101)
    labels = r.randint(0, 10, size=(30, 20)).astype(numpy.uint8)
    handle, name = tempfile.mkstemp(".png")
    bioformats.write_image(name, labels, bioformats.PT_UINT8)
    os.close(handle)
    png_path, png_file = os.path.split(name)
    sbs_dir = os.path.join(tests.modules.example_images_directory(),
                           "ExampleSBSImages")
    csv_text = """%s_%s,%s_%s,%s_DNA,%s_DNA
%s,%s,Channel2-01-A-01.tif,%s
""" % (
        cellprofiler.measurement.C_OBJECTS_FILE_NAME,
        OBJECTS_NAME,
        cellprofiler.measurement.C_OBJECTS_PATH_NAME,
        OBJECTS_NAME,
        cellprofiler.measurement.C_FILE_NAME,
        cellprofiler.measurement.C_PATH_NAME,
        png_file,
        png_path,
        sbs_dir,
    )
    pipeline, module, csv_name = make_pipeline(csv_text)
    assert isinstance(pipeline, cellprofiler.pipeline.Pipeline)
    assert isinstance(module, cellprofiler.modules.loaddata.LoadData)
    module.wants_images.value = True
    try:
        image_set_list = cellprofiler.image.ImageSetList()
        measurements = cellprofiler.measurement.Measurements()
        workspace = cellprofiler.workspace.Workspace(pipeline, module, None,
                                                     None, measurements,
                                                     image_set_list)
        pipeline.prepare_run(workspace)
        key_names, g = pipeline.get_groupings(workspace)
        assert len(g) == 1
        module.prepare_group(workspace, g[0][0], g[0][1])
        image_set = image_set_list.get_image_set(g[0][1][0] - 1)
        object_set = cellprofiler.object.ObjectSet()
        workspace = cellprofiler.workspace.Workspace(pipeline, module,
                                                     image_set, object_set,
                                                     measurements,
                                                     image_set_list)
        module.run(workspace)
        objects = object_set.get_objects(OBJECTS_NAME)
        assert numpy.all(objects.segmented == labels)
        assert (measurements.get_current_image_measurement(
            cellprofiler.measurement.FF_COUNT % OBJECTS_NAME) == 9)
        for feature in (
                cellprofiler.measurement.M_LOCATION_CENTER_X,
                cellprofiler.measurement.M_LOCATION_CENTER_Y,
                cellprofiler.measurement.M_NUMBER_OBJECT_NUMBER,
        ):
            value = measurements.get_current_measurement(OBJECTS_NAME, feature)
            assert len(value) == 9
    finally:
        bioformats.formatreader.clear_image_reader_cache()
        os.remove(name)
        os.remove(csv_name)
예제 #2
0
def test_group_on_two():
    groups, workspace = make_image_sets(
        (
            ("Plate", ("P-12345", "P-23456")),
            ("Well", ("A01", "A02", "A03")),
            ("Site", ("1", "2", "3", "4")),
        ),
        (
            (
                "DNA",
                "Wavelength",
                "1",
                cellprofiler.pipeline.Pipeline.ImageSetChannelDescriptor.
                CT_GRAYSCALE,
            ),
            (
                "GFP",
                "Wavelength",
                "1",
                cellprofiler.pipeline.Pipeline.ImageSetChannelDescriptor.
                CT_GRAYSCALE,
            ),
        ),
    )
    groups.wants_groups.value = True
    groups.grouping_metadata[0].metadata_choice.value = "Plate"
    groups.add_grouping_metadata()
    groups.grouping_metadata[1].metadata_choice.value = "Site"
    assert groups.prepare_run(workspace)
    m = workspace.measurements
    assert isinstance(m, cellprofiler.measurement.Measurements)
    image_numbers = m.get_image_numbers()

    pipeline = workspace.pipeline
    assert isinstance(pipeline, cellprofiler.pipeline.Pipeline)
    key_list, groupings = pipeline.get_groupings(workspace)
    assert len(key_list) == 2
    assert key_list[0] == "Metadata_Plate"
    assert key_list[1] == "Metadata_Site"
    assert len(groupings) == 8

    idx = 0
    for plate in ("P-12345", "P-23456"):
        for site in ("1", "2", "3", "4"):
            grouping, image_set_list = groupings[idx]
            idx += 1
            assert grouping["Metadata_Plate"] == plate
            assert grouping["Metadata_Site"] == site
            assert len(image_set_list) == 3
            ftr = "_".join((cellprofiler.measurement.C_FILE_NAME, "DNA"))
            for image_number in image_set_list:
                file_name = m[cellprofiler.measurement.IMAGE, ftr,
                              image_number]
                p, w, s, rest = file_name.split("_")
                assert p == plate
                assert s == site
예제 #3
0
    def test_11_01_load_objects(self):
        r = numpy.random.RandomState()
        r.seed(1101)
        labels = r.randint(0, 10, size=(30, 20)).astype(numpy.uint8)
        handle, name = tempfile.mkstemp(".png")
        bioformats.write_image(name, labels, bioformats.PT_UINT8)
        os.close(handle)
        png_path, png_file = os.path.split(name)
        sbs_dir = os.path.join(tests.modules.example_images_directory(), "ExampleSBSImages")
        csv_text = """%s_%s,%s_%s,%s_DNA,%s_DNA
%s,%s,Channel2-01-A-01.tif,%s
""" % (cellprofiler.measurement.C_OBJECTS_FILE_NAME, OBJECTS_NAME,
       cellprofiler.measurement.C_OBJECTS_PATH_NAME, OBJECTS_NAME,
       cellprofiler.measurement.C_FILE_NAME, cellprofiler.measurement.C_PATH_NAME,
       png_file, png_path, sbs_dir)
        pipeline, module, csv_name = self.make_pipeline(csv_text)
        assert isinstance(pipeline, cellprofiler.pipeline.Pipeline)
        assert isinstance(module, cellprofiler.modules.loaddata.LoadData)
        module.wants_images.value = True
        try:
            image_set_list = cellprofiler.image.ImageSetList()
            measurements = cellprofiler.measurement.Measurements()
            workspace = cellprofiler.workspace.Workspace(
                    pipeline, module, None, None, measurements, image_set_list)
            pipeline.prepare_run(workspace)
            key_names, g = pipeline.get_groupings(workspace)
            self.assertEqual(len(g), 1)
            module.prepare_group(workspace, g[0][0], g[0][1])
            image_set = image_set_list.get_image_set(g[0][1][0] - 1)
            object_set = cellprofiler.object.ObjectSet()
            workspace = cellprofiler.workspace.Workspace(pipeline, module, image_set,
                                                         object_set, measurements, image_set_list)
            module.run(workspace)
            objects = object_set.get_objects(OBJECTS_NAME)
            self.assertTrue(numpy.all(objects.segmented == labels))
            self.assertEqual(measurements.get_current_image_measurement(
                cellprofiler.measurement.FF_COUNT % OBJECTS_NAME), 9)
            for feature in (cellprofiler.measurement.M_LOCATION_CENTER_X,
                            cellprofiler.measurement.M_LOCATION_CENTER_Y,
                            cellprofiler.measurement.M_NUMBER_OBJECT_NUMBER):
                value = measurements.get_current_measurement(
                        OBJECTS_NAME, feature)
                self.assertEqual(len(value), 9)
        finally:
            bioformats.formatreader.clear_image_reader_cache()
            os.remove(name)
            os.remove(csv_name)
예제 #4
0
def test_group_on_one():
    groups = cellprofiler.modules.groups.Groups()
    groups, workspace = make_image_sets(
        (
            ("Plate", ("P-12345", "P-23456")),
            ("Well", ("A01", "A02", "A03")),
            ("Site", ("1", "2", "3", "4")),
        ),
        (
            (
                "DNA",
                "Wavelength",
                "1",
                cellprofiler.pipeline.Pipeline.ImageSetChannelDescriptor.CT_GRAYSCALE,
            ),
            (
                "GFP",
                "Wavelength",
                "1",
                cellprofiler.pipeline.Pipeline.ImageSetChannelDescriptor.CT_GRAYSCALE,
            ),
        ),
    )
    groups.wants_groups.value = True
    groups.grouping_metadata[0].metadata_choice.value = "Plate"
    groups.prepare_run(workspace)
    m = workspace.measurements
    assert isinstance(m, cellprofiler.measurement.Measurements)
    image_numbers = m.get_image_numbers()
    assert len(image_numbers) == 24
    numpy.testing.assert_array_equal(
        numpy.hstack([numpy.ones(12, int), numpy.ones(12, int) * 2]),
        m[
            cellprofiler.measurement.IMAGE,
            cellprofiler.measurement.GROUP_NUMBER,
            image_numbers,
        ],
    )
    numpy.testing.assert_array_equal(
        numpy.hstack([numpy.arange(1, 13)] * 2),
        m[
            cellprofiler.measurement.IMAGE,
            cellprofiler.measurement.GROUP_INDEX,
            image_numbers,
        ],
    )

    pipeline = workspace.pipeline
    assert isinstance(pipeline, cellprofiler.pipeline.Pipeline)
    key_list, groupings = pipeline.get_groupings(workspace)
    assert len(key_list) == 1
    assert key_list[0] == "Metadata_Plate"
    assert len(groupings) == 2

    for group_number, plate, (grouping, image_set_list) in zip(
        (1, 2), ("P-12345", "P-23456"), groupings
    ):
        assert grouping == dict(Metadata_Plate=plate)
        assert len(image_set_list) == 3 * 4
        assert list(image_set_list) == list(
            range((group_number - 1) * 12 + 1, group_number * 12 + 1)
        )
        for image_number in range(1 + (group_number - 1) * 12, 1 + group_number * 12):
            for image_name in ("DNA", "GFP"):
                ftr = "_".join((cellprofiler.measurement.C_FILE_NAME, image_name))
                assert m[cellprofiler.measurement.IMAGE, ftr, image_number].startswith(
                    plate
                )
예제 #5
0
def make_jobs(args):
    
    # how to make this nicer?
    xvfb = args.xvfb
    software = args.software
    script = args.script
    script_arguments = args.script_arguments
    memory = args.memory
    queue = args.queue
    host_group = args.host_group    
    num_jobs_max = 1000
    
    print ''
    print 'make_cellprofiler_jobs_LSF:'
    print ''
                

    #
    # determine files to analyze
    #
    
    CELLPROFILERDIR = '/g/software/linux/pack/cellprofiler-2.0.11047/lib'
    sys.path.insert(0, CELLPROFILERDIR)
    # try importing cellprofiler modules
    global cellprofiler
    import cellprofiler
    import cellprofiler.pipeline
    import cellprofiler.workspace
    import cellprofiler.measurements
    import cellprofiler.cpimage

    # LOAD PIPELINE
    pipeline_file = script
    if not os.path.isfile(pipeline_file):
        raise Exception("-- ERROR: pipeline file not found")

    cp_plugin_directory = None
    if 'CP_PLUGIN_DIRECTORY' in os.environ:
        cp_plugin_directory = os.environ['CP_PLUGIN_DIRECTORY']

    pipeline = cellprofiler.pipeline.Pipeline()

    def error_callback(event, caller):
        if (isinstance(event, cellprofiler.pipeline.LoadExceptionEvent) or
            isinstance(event, cellprofiler.pipeline.RunExceptionEvent)):
            sys.stdout.write("Handling exception: {}\n".format(event))
            sys.stdout.write(traceback.format_exc())
            sys.sydout.flush()
    pipeline.add_listener(error_callback)

    # pipeline.remove_listener(error_callback)

    # GET NUMBER OF IMAGES AND GROUPS

    # check wether we have a new or an old version of CellProfiler

    import inspect
    argspec = inspect.getargspec(pipeline.prepare_run)
    if argspec[0][1] == 'workspace' or len(argspec[0]) == 2:
        print 'New CellProfiler version'
        new_version = True
    else:
        print 'Old CellProfiler version'
        new_version = False

    if new_version:
        # this should work for plain pipeline files ...
        try:
            pipeline.load(pipeline_file)
            image_set_list = cellprofiler.cpimage.ImageSetList()
            measurements = cellprofiler.measurements.Measurements()
            workspace = cellprofiler.workspace.Workspace(
                pipeline, None, None, None,
                measurements, image_set_list
            )
            grouping_argument = workspace
            result = pipeline.prepare_run(workspace)
            grouping_keys, groups = pipeline.get_groupings(
                grouping_argument
            )
            pipeline.prepare_group(
                grouping_argument, groups[0][0], groups[0][1])
            num_sets = image_set_list.count()
        except:
            import traceback
            traceback.print_exc()
            raise Exception('Unable to load pipeline file:', pipeline_file)
            # ... and this should work for files created with
            # the CreateBatchFile module
            measurements = cellprofiler.measurements.load_measurements(
                pipeline_file
            )
            print 'Obtaining list of image sets...this can take a while...'
            image_set_list = measurements.get_image_numbers()
            grouping_keys = []
            num_sets = len(image_set_list)
    else:
        try:
            pipeline.load(pipeline_file)
        except:
            import traceback
            traceback.print_exc()
            raise Exception('Unable to load pipeline file:', pipeline_file)

        workspace = None
        grouping_argument = workspace

        print 'Obtaining list of image sets...this can take a while...'
        result = pipeline.prepare_run(workspace)
        if not result:
            raise Exception("Failed to prepare running the pipeline")

        if not new_version:
            grouping_argument = result
            image_set_list = result

        grouping_keys, groups = pipeline.get_groupings(grouping_argument)

        if new_version:
            pipeline.prepare_group(
                grouping_argument, groups[0][0], groups[0][1])

        num_sets = image_set_list.count()

    print("Image sets: {}".format(num_sets))
    if num_sets == 0:
        print 'No image sets to process...finished'
        sys.exit(0)

    # GET IMAGE PATH
    input_dir = None # could be also an directory with image files if one does not use Batch_data.mat....
    if input_dir is None:
        loadimage_module_name = 'LoadImages'
        cp_modules = pipeline.modules()
        loadimage_module = None
        for module in cp_modules:
            if module.module_name == loadimage_module_name:
                loadimage_module = module
                break
        if loadimage_module:
            input_dir = str(loadimage_module.location).partition('|')[2]
            print("Image path: {}".format(input_dir))
        else:
            print '-- WARNING: The LoadImage module is not used in this' \
                  ' pipeline. Default input folder is undefined'
            #print('-- ERROR: Could not load the image module!')
            #sys.exit(1)

    # CREATE BATCHES
    jobStartImages = []
    jobEndImages = []
    jobLengths = []

    if len(grouping_keys) > 0:
        print('Using groupings to assign the jobs to {} groups.'.format(
            len(groups)))
        for group in groups:
            #print 'group length',len(group[1])
            #print group[1][1]
            jobStartImages.append(group[1][0])
            jobEndImages.append(group[1][-1])
            jobLengths.append(len(group[1]))
        #batch_size_max = max(jobLengths)
        print 'Starting images:'
        print jobStartImages
    else:
        print "No groupings assigned => " \
              "images will be randomly assigned to the jobs."
        if int(args.batch_size) > 0:
            batch_size = int(args.batch_size)
        else:
            batch_size = max(4 , int(num_sets / float(num_jobs_max)) + 1)
        #batch_size = 4 #int(round(num_sets/num_jobs_max)+1)
        jobStartImages = range(1, num_sets + 1, batch_size)
        for x in jobStartImages:
            jobEndImages.append(x + batch_size - 1)
        jobEndImages[-1] = num_sets
        #batch_size_max = batch_size
    
    #
    # create directories
    #
    input_dir = input_dir.rstrip(os.path.sep) # remove trailing slash if exists
    output_dir = input_dir + '--cluster'
    print('Cluster directory: {}'.format(output_dir))
   
    log_dir = os.path.join(output_dir, "log")  # contains information about job status
    job_dir = os.path.join(output_dir, "jobs") # contains the actual job scripts

    # create directories
    ensure_empty_dir(output_dir)
    ensure_empty_dir(log_dir)
    ensure_empty_dir(job_dir)

    
    #
    # write the job files
    #
    
    for iJob in range(0, len(jobStartImages)):
        
        # chose image subset
        start = jobStartImages[iJob]
        end = jobEndImages[iJob]
        if end > num_sets:
            end = num_sets
   
        # write the jobs to files
        script_name = "job_{}.sh".format(iJob + 1)
        script_name = os.path.join(job_dir, script_name)
        script_file = file(script_name, "w")

        # information to LSF
        txt = ['#!/bin/bash',
                '#BSUB -oo "{}/job_{}--out.txt"'.format(log_dir,iJob+1),
                '#BSUB -eo "{}/job_{}--err.txt"'.format(log_dir,iJob+1),
                '#BSUB -M {}'.format(memory),
                '#BSUB -R select[mem>{}] -R rusage[mem={}]'.format(memory,memory),
                '#BSUB -R span[hosts=1]'
                ]
        txt = '\n'.join(txt)
        txt = txt + '\n'
        script_file.write(txt)


        if queue:
          script_file.write(
            '#BSUB -q {}\n'.format(queue)
          )        
          
        if host_group:
          script_file.write(
            '#BSUB -m {}\n'.format(host_group)
          )        

        
        script_file.write(
            'echo "starting job"\n'
        )        

        # write a file to be able to check from the outside that the job has started
        script_file.write(
            'touch "{}/job_{}--started.touch"\n'.format(log_dir,iJob+1)
        )

        # do not produce core dumps  
        script_file.write(
            'ulimit -c 0\n'
        )        
                
        txt = ['echo "hostname:"',
               'hostname',
               'echo "number of cores:"',
               'nproc',
               'echo "total RAM:"',
               'head -1 /proc/meminfo'
               ]
        txt = '\n'.join(txt)
        txt = txt + '\n'
        script_file.write(txt)

        # generate the actual command      
        def make_command(software, script, script_arguments):
            cmd = [
               software,
               "-c -b -r",
               "-p {}".format(script),
               script_arguments
            ]
            return ' '.join(cmd)

        script_arguments = "-f {} -l {}".format(start, end)
        
        # using software without quotation as it does not work with
        cmd = make_command(software, script, script_arguments)
        script_file.write(cmd + '\n')

        script_file.write(
            'echo "job finished"\n'
        )        
        
        # this is the last line in the script, because this will be displayed as the job name by LSF
        '''
        script_file.write(
            'echo "# job {}"\n'.format(iJob)
        )  
        '''
        script_file.close()
        
        # make script executable
        os.system('chmod a+x "{}"'.format(script_name))

    return job_dir, len(jobStartImages)