コード例 #1
0
def create_network():
    # Import the faster environment and set it up
    import fastr

    network = fastr.Network(id_="elastix_test")

    source1 = network.create_source('ITKImageFile', id_='fixed_img')
    source2 = network.create_source('ITKImageFile', id_='moving_img')
    param1 = network.create_source('ElastixParameterFile', id_='param_file')

    elastix_node = network.create_node('elastix_dev', id_='elastix')
    elastix_node.inputs['fixed_image'] = source1.output
    elastix_node.inputs['moving_image'] = source2.output
    link_param = network.create_link(param1.output, elastix_node.inputs['parameters'])
    link_param.converge = 0

    outtrans = network.create_sink('ElastixTransformFile', id_='sink_trans')
    outtrans.inputs['input'] = elastix_node.outputs['transform']

    transformix_node = network.create_node('transformix_dev', id_='transformix')
    transformix_node.inputs['image'] = source2.output
    transformix_node.inputs['transform'] = elastix_node.outputs['transform'][-1]

    outimage = network.create_sink('ITKImageFile', id_='sink_image')
    outimage.inputs['input'] = transformix_node.outputs['image']

    network.draw_network(img_format='svg')
    network.dumpf('{}.json'.format(network.id), indent=2)

    return network
コード例 #2
0
def create_network():
    # Import the faster environment and set it up
    import fastr
    # Create a new network
    network = fastr.Network(id_='Segmentix_test')

    # Create a source node in the network
    source_segmentation = network.create_source('ITKImageFile', id_='segmentation_in')
    source_mask = network.create_source('ITKImageFile', id_='mask')
    source_parameters = network.create_source('ParameterFile', id_='parameters')

    # Create a new node in the network using toollist
    node_segmentix = network.create_node('Segmentix', id_="segmentix")

    # Create a link between the source output and an input of the addint node
    node_segmentix.inputs['segmentation_in'] = source_segmentation.output
    node_segmentix.inputs['mask'] = source_mask.output
    node_segmentix.inputs['parameters'] = source_parameters.output

    # Create a sink to save the data
    sink_segmentation = network.create_sink('ITKImageFile', id_='segmentation_out')

    # Link the addint node to the sink
    sink_segmentation.input = node_segmentix.outputs['segmentation_out']

    return network
コード例 #3
0
def create_network():
    # Import the faster environment and set it up
    import fastr

    # Create a new network
    network = fastr.Network(id_='CalcFeatures_test')

    # Create a source node in the network
    source_segmentation = network.create_source('ITKImageFile',
                                                id_='segmentation')
    source_image = network.create_source('ITKImageFile', id_='image')
    source_metadata = network.create_source('DicomImageFile', id_='metadata')
    source_parameters = network.create_source('ParameterFile',
                                              id_='parameters')

    # Create a new node in the network using toollist
    node_calfeatures = network.create_node('CalcFeatures', id_="calcfeatures")

    # Create a link between the source output and an input of the addint node
    node_calfeatures.inputs['segmentation'] = source_segmentation.output
    node_calfeatures.inputs['image'] = source_image.output
    node_calfeatures.inputs['metadata'] = source_metadata.output
    node_calfeatures.inputs['parameters'] = source_parameters.output

    # Create a sink to save the data
    sink_features = network.create_sink('HDF5', id_='features')

    # Link the addint node to the sink
    sink_features.input = node_calfeatures.outputs['features']

    return network
コード例 #4
0
    def create_network(self):
        self.network = fastr.Network(id_="transformix")

        self.MovingImageSource = self.network.create_source('ITKImageFile',
                                                            id_='MovingImage')
        self.ParameterMapSource = self.network.create_source(
            'ElastixTransformFile', id_='ParameterFile')

        self.transformix_node = self.network.create_node('transformix_dev',
                                                         id_='transformix')
        self.transformix_node.inputs['image'] = self.MovingImageSource.output
        self.transformix_node.inputs[
            'transform'] = self.ParameterMapSource.output

        self.outimage = self.network.create_sink('ITKImageFile',
                                                 id_='sink_image')
        self.outimage.inputs['input'] = self.transformix_node.outputs['image']

        self.network.draw_network(img_format='svg')
        self.network.dumpf('{}.json'.format(self.network.id), indent=2)
コード例 #5
0
    def __init__(self,
                 label_type,
                 ensemble=50,
                 scores='percentages',
                 network=None,
                 features=None,
                 fastr_plugin='ProcessPoolExecution',
                 name='Example'):
        '''
        Build a network that evaluates the performance of an estimator.

        Parameters
        ----------

        network: fastr network, default None
                If you input a network, the evaluate network is added
                to the existing network.

        '''
        if network is not None:
            self.network = network
            self.mode = 'WORC'
        else:
            self.mode = 'StandAlone'
            self.fastr_plugin = fastr_plugin
            self.name = 'WORC_Evaluate_' + name
            self.network = fastr.Network(id_=self.name)
            self.fastr_tmpdir = os.path.join(fastr.config.mounts['tmp'],
                                             self.name)

        if features is None and self.mode == 'StandAlone':
            raise WORCexceptions.IOError(
                'Either features as input or a WORC network is required for the Evaluate network.'
            )

        self.features = features

        self.label_type = label_type
        self.ensemble = ensemble

        self.create_network()
コード例 #6
0
ファイル: Slicer.py プロジェクト: Sikerdebaard/PREDICTFastr
    def __init__(self,
                 images=None,
                 segmentations=None,
                 network=None,
                 fastr_plugin='ProcessPoolExecution',
                 name='Example'):
        '''
        Build a network that evaluates the performance of an estimator.

        Parameters
        ----------

        network: fastr network, default None
                If you input a network, the evaluate network is added
                to the existing network.

        '''
        if network is not None:
            self.network = network
            self.mode = 'WORC'
        else:
            self.mode = 'StandAlone'
            self.fastr_plugin = fastr_plugin
            self.name = 'WORC_Slicer_' + name
            self.network = fastr.Network(id_=self.name)
            self.fastr_tmpdir = os.path.join(fastr.config.mounts['tmp'],
                                             self.name)

        if images is None and self.mode == 'StandAlone':
            message = 'Either images and segmentations as input or a WORC' +\
             'network is required for the Evaluate network.'
            raise WORCexceptions.IOError(message)

        self.image = images
        self.segmentations = segmentations

        self.create_network()
コード例 #7
0
def main():
    #################################################
    #### PARAMETERS #################################
    #################################################

    # network name
    network_name = 'multiatlas_femur_segm_CV'

    output_segm_name = 'segm_woapp_hip_foldnr'

    output_eval_meas_name = 'dice_woapp_hip_foldnr'

    # output folder segmentations
    output_segm_folder = 'vfs://fastr_data/hipdata/output/'

    # output folder evaluation measure
    output_eval_meas_folder = 'vfs://fastr_data/hipdata/output/'

    # number of cross-validation folds
    num_folds = 5
    # starting fold
    foldnr = 1

    nrclasses = 2
    # radius for dilation mask
    radius = [5.0]

    # registration parameter files
    registration_parameters = ('vfs://elastix_files/par_affine_multi.txt',
                               'vfs://elastix_files/par_bspline5mm_multi.txt')
    registration_parameters_generate_mask = (
        'vfs://elastix_file/par_similarity.txt', )

    # MRI volume names
    CV_img = [
        'imageR112621fw', 'imageR112629fw', 'imageR112657fw', 'imageR113297fw',
        'imageR115510fw', 'imageR118132fw', 'imageR118663fw', 'imageR118972fw',
        'imageR119833fw', 'imageR119927fw', 'imageR128348fw', 'imageR129317fw',
        'imageR129358fw', 'imageR131044fw', 'imageR131489fw', 'imageR131717fw',
        'imageR132132fw'
    ]

    # label volume names
    CV_label = [
        'maskR112621', 'maskR112629', 'maskR112657', 'maskR113297',
        'maskR115510', 'maskR118132', 'maskR118663', 'maskR118972',
        'maskR119833', 'maskR119927', 'maskR128348', 'maskR129317',
        'maskR129358', 'maskR131044', 'maskR131489', 'maskR131717',
        'maskR132132'
    ]

    # Region of interest volume
    CV_ROI = [
        'ROI112621', 'ROI112629', 'ROI112657', 'ROI113297', 'ROI115510',
        'ROI118132', 'ROI118663', 'ROI118972', 'ROI119833', 'ROI119927',
        'ROI128348', 'ROI129317', 'ROI129358', 'ROI131044', 'ROI131489',
        'ROI131717', 'ROI132132'
    ]

    cv = cross_validation.KFold(len(CV_img), n_folds=num_folds, random_state=0)

    # sourcedata dictionary contains path of all data volumes as well as path of
    # appearance model classifier : 'classifier'
    sourcedata = {
        'scalespace_img': {
            'imageR112621fw':
            ('vfs://fastr_data/hipdata/images/R112621f.nii.gz', ),  #1
            'imageR112629fw':
            ('vfs://fastr_data/hipdata/images/R112629f.nii.gz', ),  #2
            'imageR112657fw':
            ('vfs://fastr_data/hipdata/images/R112657f.nii.gz', ),  #3
            'imageR113297fw':
            ('vfs://fastr_data/hipdata/images/R113297f.nii.gz', ),  #4
            'imageR115510fw':
            ('vfs://fastr_data/hipdata/images/R115510f.nii.gz', ),  #5
            'imageR118132fw':
            ('vfs://fastr_data/hipdata/images/R118132f.nii.gz', ),  #6
            'imageR118663fw':
            ('vfs://fastr_data/hipdata/images/R118663f.nii.gz', ),  #7
            'imageR118972fw':
            ('vfs://fastr_data/hipdata/images/R118972f.nii.gz', ),  #8
            'imageR119833fw':
            ('vfs://fastr_data/hipdata/images/R119833f.nii.gz', ),  #9
            'imageR119927fw':
            ('vfs://fastr_data/hipdata/images/R119927f.nii.gz', ),  #10
            'imageR128348fw':
            ('vfs://fastr_data/hipdata/images/R128348f.nii.gz', ),  #11
            'imageR129317fw':
            ('vfs://fastr_data/hipdata/images/R129317f.nii.gz', ),  #12
            'imageR129358fw':
            ('vfs://fastr_data/hipdata/images/R129358f.nii.gz', ),  #13
            'imageR131044fw':
            ('vfs://fastr_data/hipdata/images/R131044f.nii.gz', ),  #14
            'imageR131489fw':
            ('vfs://fastr_data/hipdata/images/R131489f.nii.gz', ),  #15
            'imageR131717fw':
            ('vfs://fastr_data/hipdata/images/R131717f.nii.gz', ),  #16
            'imageR132132fw':
            ('vfs://fastr_data/hipdata/images/R132132f.nii.gz', ),  #17
        },
        'atlas_img': {
            'imageR112621fw':
            ('vfs://fastr_data/hipdata/images/R112621f.nii.gz',
             'vfs://fastr_data/hipdata/images/R112621w.nii.gz'),  #1
            'imageR112629fw':
            ('vfs://fastr_data/hipdata/images/R112629f.nii.gz',
             'vfs://fastr_data/hipdata/images/R112629w.nii.gz'),  #2
            'imageR112657fw':
            ('vfs://fastr_data/hipdata/images/R112657f.nii.gz',
             'vfs://fastr_data/hipdata/images/R112657w.nii.gz'),  #3
            'imageR113297fw':
            ('vfs://fastr_data/hipdata/images/R113297f.nii.gz',
             'vfs://fastr_data/hipdata/images/R113297w.nii.gz'),  #4
            'imageR115510fw':
            ('vfs://fastr_data/hipdata/images/R115510f.nii.gz',
             'vfs://fastr_data/hipdata/images/R115510w.nii.gz'),  #5
            'imageR118132fw':
            ('vfs://fastr_data/hipdata/images/R118132f.nii.gz',
             'vfs://fastr_data/hipdata/images/R118132w.nii.gz'),  #6
            'imageR118663fw':
            ('vfs://fastr_data/hipdata/images/R118663f.nii.gz',
             'vfs://fastr_data/hipdata/images/R118663w.nii.gz'),  #7
            'imageR118972fw':
            ('vfs://fastr_data/hipdata/images/R118972f.nii.gz',
             'vfs://fastr_data/hipdata/images/R118972w.nii.gz'),  #8
            'imageR119833fw':
            ('vfs://fastr_data/hipdata/images/R119833f.nii.gz',
             'vfs://fastr_data/hipdata/images/R119833w.nii.gz'),  #9
            'imageR119927fw':
            ('vfs://fastr_data/hipdata/images/R119927f.nii.gz',
             'vfs://fastr_data/hipdata/images/R119927w.nii.gz'),  #10
            'imageR128348fw':
            ('vfs://fastr_data/hipdata/images/R128348f.nii.gz',
             'vfs://fastr_data/hipdata/images/R128348f.nii.gz'),  #11
            'imageR129317fw':
            ('vfs://fastr_data/hipdata/images/R129317f.nii.gz',
             'vfs://fastr_data/hipdata/images/R129317w.nii.gz'),  #12
            'imageR129358fw':
            ('vfs://fastr_data/hipdata/images/R129358f.nii.gz',
             'vfs://fastr_data/hipdata/images/R129358w.nii.gz'),  #13
            'imageR131044fw':
            ('vfs://fastr_data/hipdata/images/R131044f.nii.gz',
             'vfs://fastr_data/hipdata/images/R131044w.nii.gz'),  #14
            'imageR131489fw':
            ('vfs://fastr_data/hipdata/images/R131489f.nii.gz',
             'vfs://fastr_data/hipdata/images/R131489w.nii.gz'),  #15
            'imageR131717fw':
            ('vfs://fastr_data/hipdata/images/R131717f.nii.gz',
             'vfs://fastr_data/hipdata/images/R131717w.nii.gz'),  #16
            'imageR132132fw': (
                'vfs://fastr_data/hipdata/images/R132132f.nii.gz',
                'vfs://fastr_data/hipdata/images/R132132w.nii.gz')  #17
        },
        'atlas_labels': {
            'maskR112621':
            ('vfs://fastr_data/hipdata/hip_masks/R112621.nii.gz', ),  #1
            'maskR112629':
            ('vfs://fastr_data/hipdata/hip_masks/R112629.nii.gz', ),  #2
            'maskR112657':
            ('vfs://fastr_data/hipdata/hip_masks/R112657.nii.gz', ),  #3
            'maskR113297':
            ('vfs://fastr_data/hipdata/hip_masks/R113297.nii.gz', ),  #4
            'maskR115510':
            ('vfs://fastr_data/hipdata/hip_masks/R115510.nii.gz', ),  #5
            'maskR118132':
            ('vfs://fastr_data/hipdata/hip_masks/R118132.nii.gz', ),  #6
            'maskR118663':
            ('vfs://fastr_data/hipdata/hip_masks/R118663.nii.gz', ),  #7
            'maskR118972':
            ('vfs://fastr_data/hipdata/hip_masks/R118972.nii.gz', ),  #8
            'maskR119833':
            ('vfs://fastr_data/hipdata/hip_masks/R119833.nii.gz', ),  #9
            'maskR119927':
            ('vfs://fastr_data/hipdata/hip_masks/R119927.nii.gz', ),  #10
            'maskR128348':
            ('vfs://fastr_data/hipdata/hip_masks/R128348.nii.gz', ),  #11
            'maskR129317':
            ('vfs://fastr_data/hipdata/hip_masks/R129317.nii.gz', ),  #12
            'maskR129358':
            ('vfs://fastr_data/hipdata/hip_masks/R129358.nii.gz', ),  #13
            'maskR131044':
            ('vfs://fastr_data/hipdata/hip_masks/R131044.nii.gz', ),  #14
            'maskR131489':
            ('vfs://fastr_data/hipdata/hip_masks/R131489.nii.gz', ),  #15
            'maskR131717':
            ('vfs://fastr_data/hipdata/hip_masks/R131717.nii.gz', ),  #16
            'maskR132132':
            ('vfs://fastr_data/hipdata/hip_masks/R132132.nii.gz', ),  #17
        },
        'atlas_ROI': {
            'ROI112621':
            ('vfs://fastr_data/hipdata/ROI/R112621w.nii.gz', ),  #1
            'ROI112629':
            ('vfs://fastr_data/hipdata/ROI/R112629w.nii.gz', ),  #2
            'ROI112657':
            ('vfs://fastr_data/hipdata/ROI/R112657w.nii.gz', ),  #3
            'ROI113297':
            ('vfs://fastr_data/hipdata/ROI/R113297w.nii.gz', ),  #4
            'ROI115510':
            ('vfs://fastr_data/hipdata/ROI/R115510w.nii.gz', ),  #5
            'ROI118132':
            ('vfs://fastr_data/hipdata/ROI/R118132w.nii.gz', ),  #6
            'ROI118663':
            ('vfs://fastr_data/hipdata/ROI/R118663w.nii.gz', ),  #7
            'ROI118972':
            ('vfs://fastr_data/hipdata/ROI/R118972w.nii.gz', ),  #8
            'ROI119833':
            ('vfs://fastr_data/hipdata/ROI/R119833w.nii.gz', ),  #9
            'ROI119927':
            ('vfs://fastr_data/hipdata/ROI/R119927w.nii.gz', ),  #10
            'ROI128348':
            ('vfs://fastr_data/hipdata/ROI/R128348w.nii.gz', ),  #11
            'ROI129317':
            ('vfs://fastr_data/hipdata/ROI/R129317w.nii.gz', ),  #12
            'ROI129358':
            ('vfs://fastr_data/hipdata/ROI/R129358w.nii.gz', ),  #13
            'ROI131044':
            ('vfs://fastr_data/hipdata/ROI/R131044w.nii.gz', ),  #14
            'ROI131489':
            ('vfs://fastr_data/hipdata/ROI/R131489w.nii.gz', ),  #15
            'ROI131717':
            ('vfs://fastr_data/hipdata/ROI/R131717w.nii.gz', ),  #16
            'ROI132132':
            ('vfs://fastr_data/hipdata/ROI/R132132w.nii.gz', ),  #17
        }
    }
    #############################################################################
    ##################END PARAMETERS
    #############################################################################

    # Start cross-validation
    while foldnr <= num_folds:
        for train_indices, test_indices in cv:
            sourcedata_fold = {}
            sourcedata_fold['atlas_img'] = {}
            sourcedata_fold['atlas_labels'] = {}
            sourcedata_fold['atlas_ROI'] = {}
            sourcedata_fold['target_img'] = {}
            sourcedata_fold['target_labels'] = {}

        for ii in train_indices:
            sourcedata_fold['atlas_img'][CV_img[ii]] = sourcedata['atlas_img'][
                CV_img[ii]]
            sourcedata_fold['atlas_labels'][
                CV_label[ii]] = sourcedata['atlas_labels'][CV_label[ii]]
            sourcedata_fold['atlas_ROI'][CV_ROI[ii]] = sourcedata['atlas_ROI'][
                CV_ROI[ii]]

        for kk in test_indices:
            sourcedata_fold['target_img'][
                CV_img[kk]] = sourcedata['atlas_img'][CV_img[kk]]
            sourcedata_fold['target_labels'][
                CV_label[kk]] = sourcedata['atlas_labels'][CV_label[kk]]

        # Setup Network and sources
        network = fastr.Network(id_=network_name)

        # load MRI target volumes
        source_targetImages = network.create_source('NiftiImageFileCompressed',
                                                    id_='target_img',
                                                    nodegroup='target')
        source_targetlabel = network.create_source('NiftiImageFileCompressed',
                                                   id_='target_labels',
                                                   nodegroup='target')

        # load MRI atlas volumes
        source_atlasImages = network.create_source('NiftiImageFileCompressed',
                                                   id_='atlas_img',
                                                   nodegroup='atlas')
        source_atlasLabels = network.create_source('NiftiImageFileCompressed',
                                                   id_='atlas_labels',
                                                   nodegroup='atlas')
        source_atlasROI = network.create_source(
            datatype=fastr.typelist['ITKImageFile'],
            id_='atlas_ROI',
            nodegroup='atlas')

        ###########################################################################################################
        #Generate target ROI using multi-atlas similarity transform
        ###########################################################################################################

        reg_genmask = network.create_node(fastr.toollist['Elastix', '4.8'],
                                          id_='reg_genmask',
                                          memory='10G')
        reg_genmask.inputs['fixed_image'] = source_targetImages.output
        reg_genmask.inputs['moving_image'] = source_atlasImages.output
        reg_genmask.inputs['moving_image'].input_group = 'atlas'
        reg_genmask.inputs[
            'parameters'] = registration_parameters_generate_mask

        # transform target ROI according to parameters estimated by elastix
        trans_label_genmask = network.create_node('Transformix',
                                                  id_='trans_label_genmask',
                                                  memory='6G')
        link_trans_label_genmask = trans_label_genmask.inputs[
            'image'] << source_atlasROI.output
        trans_label_genmask.inputs['transform'] = reg_genmask.outputs[
            'transform'][-1]

        # combine transformed target ROI - produces hard label decided by majority vote or soft label given as probability
        combine_label_genmask = network.create_node(
            'PxCombineSegmentations', id_='combine_label_genmask')
        link_combine_genmask = network.create_link(
            trans_label_genmask.outputs['image'],
            combine_label_genmask.inputs['images'])
        link_combine_genmask.collapse = 'atlas'
        combine_label_genmask.inputs['method'] = ['VOTE']
        combine_label_genmask.inputs['number_of_classes'] = [nrclasses]

        # probability values greater than 0.5 are thresholded to 1, others 0.
        threshold = network.create_node('PxThresholdImage',
                                        id_='threshold',
                                        memory='2G')
        threshold.inputs['image'] = combine_label_genmask.outputs[
            'soft_segment'][-1]
        threshold.inputs['upper_threshold'] = [0.5]

        # convert to char datatype (this is required for itktools, node starting with 'Px')
        castconvert = network.create_node('PxCastConvert',
                                          id_='castconvert',
                                          memory='2G')
        castconvert.inputs['image'] = threshold.outputs['image']
        castconvert.inputs['component_type'] = ['char']

        # morphological operation: dilation
        morph = network.create_node('PxMorphology', id_='morph', memory='5G')
        morph.inputs['image'] = castconvert.outputs['image']
        morph.inputs['operation'] = ['dilation']
        morph.inputs['operation_type'] = ['binary']
        morph.inputs['radius'] = radius

        #############################################################################
        # Apply image processing operations to MRI volumes
        ############################################################################

        # Apply n4 non-uniformity correction to MRI atlas volumes
        n4_atlas_im = network.create_node('N4', id_='n4_atlas', memory='15G')
        linkn4atlas = n4_atlas_im.inputs['image'] << source_atlasImages.output
        linkn4atlas.expand = True
        n4_atlas_im.inputs['shrink_factor'] = 4,
        n4_atlas_im.inputs['converge'] = '[150,00001]',
        n4_atlas_im.inputs['bspline_fitting'] = '[50]',

        # Apply n4 non-uniformity correction to MRI target volumes
        n4_target_im = network.create_node('N4', id_='n4_target', memory='15G')
        linkn4target = n4_target_im.inputs[
            'image'] << source_targetImages.output
        linkn4target.expand = True
        n4_target_im.inputs['shrink_factor'] = 4,
        n4_target_im.inputs['converge'] = '[150,00001]',
        n4_target_im.inputs['bspline_fitting'] = '[50]',

        # Range match MRI atlas images
        rama_atlas_im = network.create_node('RangeMatch',
                                            id_='rama_atlas',
                                            memory='15G')
        rama_atlas_im.inputs['image'] = n4_atlas_im.outputs['image']
        link_rama_mask_atlas = rama_atlas_im.inputs[
            'mask'] << source_atlasROI.output

        # Range match MRI target images
        rama_target_im = network.create_node('RangeMatch',
                                             id_='rama_target',
                                             memory='15G')
        rama_target_im.inputs['image'] = n4_target_im.outputs['image']
        link_rama_mask_target = rama_target_im.inputs['mask'] << morph.outputs[
            'image']

        ################################
        # Multi-atlas segmentation part
        ################################

        # perform registration with elastix
        reg_t1 = network.create_node(fastr.toollist['Elastix', '4.8'],
                                     id_='reg_t1',
                                     memory='20G')
        link1 = reg_t1.inputs['fixed_image'] << rama_target_im.outputs['image']
        link1.collapse = "target_img__output"
        link2 = reg_t1.inputs['moving_image'] << rama_atlas_im.outputs['image']
        link2.collapse = "atlas_img__output"

        reg_t1.inputs['moving_image'].input_group = 'atlas'
        reg_t1.inputs['parameters'] = registration_parameters
        reg_t1.inputs['fixed_mask'] = (morph.outputs['image'],
                                       morph.outputs['image'])
        reg_t1.inputs['moving_mask'] = source_atlasROI.output
        reg_t1.inputs['moving_mask'].input_group = 'atlas'

        # transform masks according to registration results
        trans_label = network.create_node('Transformix', id_='trans_label')
        linktrans = trans_label.inputs['image'] << source_atlasLabels.output
        trans_label.inputs['transform'] = reg_t1.outputs['transform'][-1]

        # combine registered masks
        combine_label = network.create_node('PxCombineSegmentations',
                                            id_='combine_label')
        link_combine = network.create_link(trans_label.outputs['image'],
                                           combine_label.inputs['images'])
        link_combine.collapse = 'atlas'
        combine_label.inputs['method'] = ['VOTE']
        combine_label.inputs['number_of_classes'] = [nrclasses]

        # Create sink for segmentation
        out_seg = network.create_sink('NiftiImageFileCompressed',
                                      id_='out_seg')
        #out_seg.input = argmax.outputs['image']
        out_seg.input = combine_label.outputs['hard_segment']

        # dice overlap
        dice_node = network.create_node(fastr.toollist['DiceMultilabelIms'],
                                        id_='dice_multi')
        #dice_node.inputs['image1'] = argmax.outputs['image']
        dice_node.inputs['image1'] = combine_label.outputs['hard_segment']
        dice_node.inputs['image2'] = source_targetlabel.output
        dice_node.inputs['numlabels'] = 1,

        # Create sink for dice overlap score
        outnumber = network.create_sink(datatype=fastr.typelist['Float'],
                                        id_='sink_measure')
        link = network.create_link(dice_node.outputs['output'],
                                   outnumber.input)
        link.collapse = 'target'

        # location of sink files
        sinkdata = {
            'out_seg':
            output_segm_folder + output_segm_name + str(foldnr) +
            '_{sample_id}{ext}',
            'sink_measure':
            output_eval_meas_folder + output_eval_meas_name + str(foldnr) +
            '_{sample_id}_{cardinality}{ext}'
        }

        # print network
        print network.draw_network(img_format='svg', draw_dimension=True)
        fastr.log.info('^^^^^^^^^^^^^ Starting execution client.')

        # execute multi-atlas appearance segmentation
        network.execute(sourcedata_fold, sinkdata, cluster_queue="week")

        foldnr = foldnr + 1
コード例 #8
0
    def _fit(self, X, y, groups, parameter_iterable):
        """Actual fitting,  performing the search over parameters."""

        base_estimator = clone(self.estimator)
        cv = check_cv(self.cv, y, classifier=is_classifier(base_estimator))
        self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)

        X, y, groups = indexable(X, y, groups)
        n_splits = cv.get_n_splits(X, y, groups)
        if self.verbose > 0 and isinstance(parameter_iterable, Sized):
            n_candidates = len(parameter_iterable)
            print("Fitting {0} folds for each of {1} candidates, totalling"
                  " {2} fits".format(n_splits, n_candidates,
                                     n_candidates * n_splits))

        cv_iter = list(cv.split(X, y, groups))

        # Original: joblib
        # out = Parallel(
        #     n_jobs=self.n_jobs, verbose=self.verbose,
        #     pre_dispatch=pre_dispatch
        # )(delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_,
        #                           train, test, self.verbose, parameters,
        #                           fit_params=self.fit_params,
        #                           return_train_score=self.return_train_score,
        #                           return_n_test_samples=True,
        #                           return_times=True, return_parameters=True,
        #                           error_score=self.error_score)
        #   for parameters in parameter_iterable
        #   for train, test in cv_iter)

        name = ''.join(
            random.choice(string.ascii_uppercase + string.digits)
            for _ in range(10))
        tempfolder = os.path.join(fastr.config.mounts['tmp'], 'GS', name)
        if not os.path.exists(tempfolder):
            os.makedirs(tempfolder)

        # Create the parameter files
        parameters_temp = dict()
        for num, parameters in enumerate(parameter_iterable):

            parameters["Number"] = str(num)
            parameters_temp[str(num)] = parameters
            # Convert parameter set to json
            # fname = ('settings_{}.json').format(str(num))
            # sourcename = os.path.join(tempfolder, 'parameters', fname)
            # if not os.path.exists(os.path.dirname(sourcename)):
            #     os.makedirs(os.path.dirname(sourcename))
            # with open(sourcename, 'w') as fp:
            #     json.dump(parameters, fp, indent=4)

            # parameter_files_temp[str(num)] = ('vfs://tmp/{}/{}/{}/{}').format('GS',
            #                                                              name,
            #                                                              'parameters',
            #                                                              fname)

        # Split the parameters files in equal parts
        keys = parameters_temp.keys()
        keys = chunks(keys, self.n_jobspercore)
        parameter_files = dict()
        for num, k in enumerate(keys):
            temp_dict = dict()
            for number in k:
                temp_dict[number] = parameters_temp[number]

            fname = ('settings_{}.json').format(str(num))
            sourcename = os.path.join(tempfolder, 'parameters', fname)
            if not os.path.exists(os.path.dirname(sourcename)):
                os.makedirs(os.path.dirname(sourcename))
            with open(sourcename, 'w') as fp:
                json.dump(temp_dict, fp, indent=4)

            parameter_files[str(num)] =\
                ('vfs://tmp/{}/{}/{}/{}').format('GS',
                                                 name,
                                                 'parameters',
                                                 fname)

        # Create test-train splits
        traintest_files = dict()
        # TODO: ugly nummering solution
        num = 0
        for train, test in cv_iter:
            source_labels = ['train', 'test']

            source_data = pd.Series([train, test],
                                    index=source_labels,
                                    name='Train-test data')

            fname = ('traintest_{}.hdf5').format(str(num))
            sourcename = os.path.join(tempfolder, 'traintest', fname)
            if not os.path.exists(os.path.dirname(sourcename)):
                os.makedirs(os.path.dirname(sourcename))
            traintest_files[str(num)] = ('vfs://tmp/{}/{}/{}/{}').format(
                'GS', name, 'traintest', fname)

            sourcelabel = ("Source Data Iteration {}").format(str(num))
            source_data.to_hdf(sourcename, sourcelabel)

            num += 1

        # Create the files containing the estimator and settings
        estimator_labels = [
            'base_estimator', 'X', 'y', 'scorer', 'verbose', 'fit_params',
            'return_train_score', 'return_n_test_samples', 'return_times',
            'return_parameters', 'error_score'
        ]

        estimator_data = pd.Series([
            clone(base_estimator), X, y, self.scorer_, self.verbose,
            self.fit_params, self.return_train_score, True, True, True,
            self.error_score
        ],
                                   index=estimator_labels,
                                   name='estimator Data')
        fname = 'estimatordata.hdf5'
        estimatorname = os.path.join(tempfolder, fname)
        estimator_data.to_hdf(estimatorname, 'Estimator Data')

        estimatordata = ("vfs://tmp/{}/{}/{}").format('GS', name, fname)

        # Create the fastr network
        network = fastr.Network('GridSearch_' + name)
        estimator_data = network.create_source('HDF5', id_='estimator_source')
        traintest_data = network.create_source('HDF5', id_='traintest')
        parameter_data = network.create_source('JsonFile', id_='parameters')
        sink_output = network.create_sink('HDF5', id_='output')

        fitandscore = network.create_node('fitandscore',
                                          memory='8G',
                                          id_='fitandscore')
        fitandscore.inputs['estimatordata'].input_group = 'estimator'
        fitandscore.inputs['traintest'].input_group = 'traintest'
        fitandscore.inputs['parameters'].input_group = 'parameters'

        fitandscore.inputs['estimatordata'] = estimator_data.output
        fitandscore.inputs['traintest'] = traintest_data.output
        fitandscore.inputs['parameters'] = parameter_data.output
        sink_output.input = fitandscore.outputs['fittedestimator']

        source_data = {
            'estimator_source': estimatordata,
            'traintest': traintest_files,
            'parameters': parameter_files
        }
        sink_data = {
            'output':
            ("vfs://tmp/{}/{}/output_{{sample_id}}_{{cardinality}}{{ext}}"
             ).format('GS', name)
        }

        network.execute(source_data,
                        sink_data,
                        tmpdir=os.path.join(tempfolder, 'tmp'))

        # Read in the output data once finished
        # TODO: expanding fastr url is probably a nicer way
        sink_files = glob.glob(
            os.path.join(fastr.config.mounts['tmp'], 'GS', name) +
            '/output*.hdf5')
        save_data = list()
        feature_labels = list()
        scalers = list()
        GroupSel = list()
        VarSel = list()
        SelectModel = list()
        for output in sink_files:
            data = pd.read_hdf(output)
            save_data.extend(list(data['RET']))
            feature_labels.extend(list(data['feature_labels']))
            scalers.extend(list(data['scaler']))
            GroupSel.extend(list(data['GroupSelection']))
            VarSel.extend(list(data['VarSelection']))
            SelectModel.extend(list(data['SelectModel']))

        # Remove the temporary folder used
        shutil.rmtree(tempfolder)

        # if one choose to see train score, "out" will contain train score info
        if self.return_train_score:
            (train_scores, test_scores, test_sample_counts,
             fit_time, score_time, parameters_est, parameters_all) =\
              zip(*save_data)
        else:
            (test_scores, test_sample_counts,
             fit_time, score_time, parameters_est, parameters_all) =\
              zip(*save_data)

        candidate_params_est = parameters_est[::n_splits]
        candidate_params_all = parameters_all[::n_splits]
        GroupSel = GroupSel[::n_splits]
        SelectModel = SelectModel[::n_splits]
        VarSel = VarSel[::n_splits]
        scalers = scalers[::n_splits]
        feature_labels = feature_labels[::n_splits]
        n_candidates = len(candidate_params_est)

        results = dict()

        def _store(key_name, array, weights=None, splits=False, rank=False):
            """A small helper to store the scores/times to the cv_results_"""
            array = np.array(array,
                             dtype=np.float64).reshape(n_candidates, n_splits)
            if splits:
                for split_i in range(n_splits):
                    results["split%d_%s" %
                            (split_i, key_name)] = array[:, split_i]

            array_means = np.average(array, axis=1, weights=weights)
            results['mean_%s' % key_name] = array_means
            # Weighted std is not directly available in numpy
            array_stds = np.sqrt(
                np.average((array - array_means[:, np.newaxis])**2,
                           axis=1,
                           weights=weights))
            results['std_%s' % key_name] = array_stds

            if rank:
                results["rank_%s" % key_name] = np.asarray(rankdata(
                    -array_means, method='min'),
                                                           dtype=np.int32)

        # Computed the (weighted) mean and std for test scores alone
        # NOTE test_sample counts (weights) remain the same for all candidates
        test_sample_counts = np.array(test_sample_counts[:n_splits],
                                      dtype=np.int)

        _store('test_score',
               test_scores,
               splits=True,
               rank=True,
               weights=test_sample_counts if self.iid else None)
        if self.return_train_score:
            _store('train_score', train_scores, splits=True)
        _store('fit_time', fit_time)
        _store('score_time', score_time)

        best_index = np.flatnonzero(results["rank_test_score"] == 1)[0]
        best_parameters_est = candidate_params_est[best_index]
        best_groupsel = GroupSel[best_index]
        best_modelsel = SelectModel[best_index]
        best_varsel = VarSel[best_index]
        best_scaler = scalers[best_index]
        best_featlab = feature_labels[best_index]

        # Use one MaskedArray and mask all the places where the param is not
        # applicable for that candidate. Use defaultdict as each candidate may
        # not contain all the params
        param_results = defaultdict(
            partial(MaskedArray,
                    np.empty(n_candidates, ),
                    mask=True,
                    dtype=object))
        for cand_i, params in enumerate(candidate_params_all):
            for name, value in params.items():
                # An all masked empty array gets created for the key
                # `"param_%s" % name` at the first occurence of `name`.
                # Setting the value at an index also unmasks that index
                param_results["param_%s" % name][cand_i] = value

        # In order to reduce the memory used, we will only save at
        # max 100 parameter settings
        maxlen = min(100, len(candidate_params_all))
        for k in param_results.keys():
            param_results[k] = param_results[k][0:maxlen]
        results.update(param_results)

        # Store a list of param dicts at the key 'params'
        results['params'] = candidate_params_est[0:maxlen]
        results['params_all'] = candidate_params_all[0:maxlen]

        self.best_groupsel = best_groupsel
        self.best_scaler = best_scaler
        self.best_varsel = best_varsel
        self.best_modelsel = best_modelsel
        self.cv_results_ = results
        self.best_index_ = best_index
        self.best_featlab = best_featlab
        self.n_splits_ = n_splits

        if self.refit:
            # fit the best estimator using the entire dataset
            # clone first to work around broken estimators
            best_estimator = clone(base_estimator).set_params(
                **best_parameters_est)

            # Select only the feature values, not the labels
            X = [x[0] for x in X]
            if best_groupsel is not None:
                X = best_groupsel.transform(X)
            if best_modelsel is not None:
                X = best_modelsel.transform(X)
            if best_varsel is not None:
                X = best_varsel.transform(X)
            if best_scaler is not None:
                X = best_scaler.transform(X)

            if y is not None:
                best_estimator.fit(X, y, **self.fit_params)
            else:
                best_estimator.fit(X, **self.fit_params)
            self.best_estimator_ = best_estimator
        return self
def main():

    network_name = 'trainSingleAppearanceClassifier'

    path_of_class_dir = 'vfs://fastr_data/seg/class/'
    # name of classifier to be output, fastr will infer the extension {ext}
    name_of_output_classifier = 'Femke_LEFT_trained_classifer{ext}'

    # Scale space scales to extract (in mm)
    scales = (1.0, 1.6, 4.0)
    # Radius for the background sampling mask dilation (mm?)
    radius = [5.0]
    # Number/fraction of sample to sample per images
    # On element per class (class0, class1, etc)
    # If value between [0.0 - 1.0] it is a fraction of the number of samples
    # available in that class
    # If the value is above 1, it is the number of samples to take, if the
    # avaialble number of samples is lower, it will take all samples.
    nsamples = (1000, 1000)
    # Note: the parameters for the random forest classifier are in the
    # parameter file supplied as source data

    network = fastr.Network(id_=network_name)

    # create source nodes of data and labels
    source_t1 = network.create_source('NiftiImageFileCompressed',
                                      id_='images',
                                      sourcegroup='atlas')
    source_label = network.create_source('NiftiImageFileCompressed',
                                         id_='label_images',
                                         sourcegroup='atlas')
    source_param = network.create_source('KeyValueFile', id_='param_file')
    source_atlasROI = network.create_source('NiftiImageFileCompressed',
                                            id_='ROI',
                                            sourcegroup='atlas')

    # Apply n4 non-uniformity correction to MRI atlas volumes
    n4_atlas_im = network.create_node('N4', id_='n4_atlas', memory='15G')
    n4_atlas_im.inputs['image'] = source_t1.output
    n4_atlas_im.inputs['shrink_factor'] = 4,
    n4_atlas_im.inputs['converge'] = '[150,00001]',
    n4_atlas_im.inputs['bspline_fitting'] = '[50]',

    # Range match images
    rama_atlas_im = network.create_node('RangeMatch',
                                        id_='rama_atlas',
                                        memory='15G')
    rama_atlas_im.inputs['image'] = n4_atlas_im.outputs['image']
    rama_atlas_im.inputs['mask'] = source_atlasROI.output

    # convert to char datatype (this is required for itktools, node starting with 'Px')
    pxcastconvert = network.create_node('PxCastConvert', id_='castconvert')
    pxcastconvert.inputs['image'] = source_label.output
    pxcastconvert.inputs['component_type'] = ['char']

    # Create filter image for source data
    scalespacefilter = network.create_node('GaussianScaleSpace',
                                           id_='scalespacefilter',
                                           memory='15G')
    scalespacefilter.inputs['image'] = rama_atlas_im.outputs['image']
    scalespacefilter.inputs['scales'] = scales

    # Prepare mask
    morph = network.create_node('PxMorphology', id_='morph', memory='6G')
    morph.inputs['image'] = pxcastconvert.outputs['image']
    morph.inputs['operation'] = ['dilation']
    morph.inputs['operation_type'] = ['binary']
    morph.inputs['radius'] = radius

    # Sample the feature images
    sampler = network.create_node('SampleImage', id_='sampler', memory='15G')
    sampler.inputs['image'] = scalespacefilter.outputs['image']
    sampler.inputs['labels'] = pxcastconvert.outputs['image']
    sampler.inputs['mask'] = morph.outputs['image']
    sampler.inputs['nsamples'] = nsamples

    # Train the classifier, use 8 cores in parallel
    classifier = network.create_node('RandomForestTrain',
                                     id_='classifier',
                                     memory='15G',
                                     cores=8)
    link = network.create_link(sampler.outputs['sample_file'],
                               classifier.inputs['samples'])
    link.collapse = 0
    classifier.inputs['parameters'] = source_param.output
    classifier.inputs['number_of_cores'] = (8, )

    # Create sink
    out_classifier = network.create_sink('SKLearnClassifierFile',
                                         id_='out_classifier')
    out_classifier.input = classifier.outputs['classifier']

    sinkdata = {'out_classifier': path_of_class_dir + path_of_class_dir}

    print network.draw_network(img_format='svg', draw_dimension=True)
    fastr.log.info('^^^^^^^^^^^^^ Starting execution client.')
    network.execute(sourcedata, sinkdata, cluster_queue="week")
コード例 #10
0
ファイル: Elastix.py プロジェクト: nmhansson/WORC
    def create_network(self, nettype):
        if nettype == 'pairwise':
            # Create the network
            self.network = fastr.Network(id_="elastix_pair")

            # Create Sources
            self.FixedImageSource = self.network.create_source(
                'ITKImageFile', id_='FixedImage')
            self.FixedMaskSource = self.network.create_source('ITKImageFile',
                                                              id_='FixedMask')
            self.MovingImageSource = self.network.create_source(
                'ITKImageFile', id_='MovingImage')
            self.MovingMaskSource = self.network.create_source(
                'ITKImageFile', id_='MovingMask')
            self.ToTransformSource = self.network.create_source(
                'ITKImageFile', id_='ToTransform')
            self.ParameterMapSource = self.network.create_source(
                'ElastixParameterFile', id_='ParameterMaps', nodegroup='par')
            # Elastix requires the output folder as a sink
            # self.OutputFolderSource = self.network.create_sink('Directory', id_='Out')

            # Create Elastix node and links
            self.elastix_node = self.network.create_node(self.elastix_toolname,
                                                         id_='elastix')
            self.elastix_node.inputs[
                'fixed_image'] = self.FixedImageSource.output
            self.elastix_node.inputs[
                'fixed_mask'] = self.FixedMaskSource.output
            self.elastix_node.inputs[
                'moving_image'] = self.MovingImageSource.output
            self.elastix_node.inputs[
                'moving_mask'] = self.MovingMaskSource.output
            # self.OutputFolderSource.input = self.elastix_node.outputs['directory']
            self.link_param = self.network.create_link(
                self.ParameterMapSource.output,
                self.elastix_node.inputs['parameters'])
            self.link_param.collapse = 'par'

            # Create Sinks
            self.outtrans = self.network.create_sink('ElastixTransformFile',
                                                     id_='sink_trans')
            self.outimage = self.network.create_sink('ITKImageFile',
                                                     id_='sink_image')
            self.outseg = self.network.create_sink('ITKImageFile',
                                                   id_='sink_seg')
            self.outtrans.inputs['input'] = self.elastix_node.outputs[
                'transform']

            # Transform output image
            self.transformix_node = self.network.create_node(
                self.transformix_toolname, id_='transformix')
            self.transformix_node.inputs[
                'image'] = self.MovingImageSource.output
            self.transformix_node.inputs[
                'transform'] = self.elastix_node.outputs['transform'][-1]
            self.outimage.inputs['input'] = self.transformix_node.outputs[
                'image']

            # First change the FinalBSplineInterpolationOrder to  0 for the segmentation
            self.changeorder_node = self.network.create_node(
                'EditElastixTransformFile', id_='editelpara')
            self.link_trans = self.network.create_link(
                self.elastix_node.outputs['transform'][-1],
                self.changeorder_node.inputs['transform'])
            # self.link_trans.converge = 0
            # self.link_trans.collapse = 'FixedImage'
            # self.link_trans.expand = True

            # Co[y metadata from image to segmentation as Elastix uses this
            self.copymetadata_node = self.network.create_node(
                'CopyMetadata', id_='copymetadata')
            self.copymetadata_node.inputs[
                'source'] = self.MovingImageSource.output
            self.copymetadata_node.inputs[
                'destination'] = self.ToTransformSource.output

            # Then transform the segmentation
            self.transformix_node_seg = self.network.create_node(
                self.transformix_toolname, id_='transformix_seg')
            self.transformix_node_seg.inputs[
                'image'] = self.copymetadata_node.outputs['output']
            self.transformix_node_seg.inputs[
                'transform'] = self.changeorder_node.outputs['transform'][-1]
            self.outseg.inputs['input'] = self.transformix_node_seg.outputs[
                'image']
        else:
            # Create the network
            self.network = fastr.Network(id_="elastix_group")

            # Create Sources
            self.FixedImageSource = self.network.create_source(
                'ITKImageFile', id_='FixedImage')
            self.FixedMaskSource = self.network.create_source('ITKImageFile',
                                                              id_='FixedMask')
            self.ToTransformSource = self.network.create_source(
                'ITKImageFile', id_='ToTransform')
            self.ParameterMapSource = self.network.create_source(
                'ElastixParameterFile', id_='ParameterMaps', nodegroup='par')
            # Elastix requires the output folder as a sink
            # self.OutputFolderSource = self.network.create_sink('Directory', id_='Out')

            # Create Elastix node and links
            self.elastix_node = self.network.create_node(self.elastix_toolname,
                                                         id_='elastix')
            self.elastix_node.inputs[
                'fixed_image'] = self.FixedImageSource.output
            self.elastix_node.inputs[
                'fixed_mask'] = self.FixedMaskSource.output
            self.elastix_node.inputs[
                'moving_image'] = self.FixedImageSource.output
            self.elastix_node.inputs[
                'moving_mask'] = self.FixedMaskSource.output
            # self.OutputFolderSource.input = self.elastix_node.outputs['directory']
            self.link_param = self.network.create_link(
                self.ParameterMapSource.output,
                self.elastix_node.inputs['parameters'])
            self.link_param.collapse = 'par'

            # Create Sinks
            self.outtrans = self.network.create_sink('ElastixTransformFile',
                                                     id_='sink_trans')
            self.outimage = self.network.create_sink('ITKImageFile',
                                                     id_='sink_image')
            self.outseg = self.network.create_sink('ITKImageFile',
                                                   id_='sink_seg')
            self.outtrans.inputs['input'] = self.elastix_node.outputs[
                'transform']

            # Transform output image
            self.transformix_node = self.network.create_node(
                self.transformix_toolname, id_='transformix')
            self.transformix_node.inputs[
                'image'] = self.MovingImageSource.output
            self.transformix_node.inputs[
                'transform'] = self.elastix_node.outputs['transform'][-1]
            self.outimage.inputs['input'] = self.transformix_node.outputs[
                'image']

            # First change the FinalBSplineInterpolationOrder to  0 for the segmentation
            self.changeorder_node = self.network.create_node(
                'EditElastixTransformFile', id_='editelpara')
            self.changeorder_node.inputs['set'] = [
                "FinalBSplineInterpolationOrder=0"
            ]
            self.link_trans = self.network.create_link(
                self.elastix_node.outputs['transform'],
                self.changeorder_node.inputs['transform'][-1])
            # self.link_trans.converge = 0
            # self.link_trans.collapse = 'FixedImage'
            # self.link_trans.expand = True

            # Co[y metadata from image to segmentation as Elastix uses this
            self.copymetadata_node = self.network.create_node(
                'CopyMetadata', id_='copymetadata')
            self.copymetadata_node.inputs[
                'source'] = self.MovingImageSource.output
            self.copymetadata_node.inputs[
                'destination'] = self.ToTransformSource.output

            # Then transform the segmentation
            self.transformix_node_seg = self.network.create_node(
                self.transformix_toolname, id_='transformix_seg')
            self.transformix_node_seg.inputs[
                'image'] = self.copymetadata_node.outputs['output']
            self.transformix_node_seg.inputs[
                'transform'] = self.changeorder_node.outputs['transform'][-1]
            self.outseg.inputs['input'] = self.transformix_node_seg.outputs[
                'image']
def main():
    #################################################
    #### PARAMETERS #################################
    #################################################

    # network name
    network_name = 'trainAppearanceModelCV'

    mri_folder = 'vfs://fastr_data/OAI_KneeMRI_testdata/Nifti_gz_FemoralCartilage_IMS_crop/'

    label_folder = 'vfs://fastr_data/OAI_KneeMRI_testdata/Nifti_gz_FemoralCartilage_GT/cropped_data/'

    # Note: the parameters for the random forest classifier are in the
    # parameter file supplied as source data in folder:
    param_folder = 'vfs://fastr_data/seg/param/'

    output_appearance_name = 'fastr_voxclass_single_femcart_foldnr'

    output_folder_appearance = 'vfs://fastr_data/seg/class/'

    # Scale space scales to extract (in mm)
    scales = (1.0, 1.6, 4.0)

    # Radius for the background sampling mask dilation (mm?)
    radius = [5.0]

    # Number/fraction of sample to sample per images
    # On element per class (class0, class1, etc)
    # If value between [0.0 - 1.0] it is a fraction of the number of samples
    # available in that class
    # If the value is above 1, it is the number of samples to take, if the
    # available number of samples is lower, it will take all samples.
    nsamples = (1000, 1000)

    # threshold for creation region of interest mask
    mask_threshold_const = 0.5

    # number of cores used in training
    num_cores = 8

    # number of cross-validation folds
    num_folds = 5
    # starting fold
    foldnr = 1

    # dictionary of names of atlas MRIs
    CV_ims = [
        'image9003406_20060322', 'image9007827_20051219',
        'image9200458_20051202', 'image9352437_20050411',
        'image9403165_20060316', 'image9496443_20050811',
        'image9567704_20050505', 'image9047800_20060306',
        'image9056363_20051010', 'image9068453_20060131',
        'image9085290_20051103', 'image9102858_20060210',
        'image9279291_20051025', 'image9352883_20051123',
        'image9357137_20051212', 'image9357383_20050912',
        'image9369649_20060224', 'image9587749_20050707'
    ]

    # dictionary of names of atlas label volumes
    CV_label = [
        'groundtruth9003406_20060322', 'groundtruth9007827_20051219',
        'groundtruth9200458_20051202', 'groundtruth9352437_20050411',
        'groundtruth9403165_20060316', 'groundtruth9496443_20050811',
        'groundtruth9567704_20050505', 'groundtruth9047800_20060306',
        'groundtruth9056363_20051010', 'groundtruth9068453_20060131',
        'groundtruth9085290_20051103', 'groundtruth9102858_20060210',
        'groundtruth9279291_20051025', 'groundtruth9352883_20051123',
        'groundtruth9357137_20051212', 'groundtruth9357383_20050912',
        'groundtruth9369649_20060224', 'groundtruth9587749_20050707'
    ]

    # Create K-Folds cross validation iterator.
    # Provides train/test indices to split data in train test sets. Split dataset into k consecutive folds (without shuffling).
    # Each fold is then used a validation set once while the k - 1 remaining fold form the training set.
    cv = cross_validation.KFold(len(CV_ims), n_folds=num_folds, random_state=0)

    # sourcedata dictionary contains the paths to mri volumes, label volumes and parameters used in training classifier.
    # NOTE that it is important to give the element informative names for debugging, e.g 'image9003406_20060322' as this will
    # us quickly identify faulty data.
    sourcedata = {
        'images': {
            'image9003406_20060322':
            mri_folder +
            '9003406_20060322_SAG_3D_DESS_LEFT_016610899303_FemoralCartilage_ims_crop.nii.gz',
            'image9007827_20051219':
            mri_folder +
            '9007827_20051219_SAG_3D_DESS_LEFT_016610641606_FemoralCartilage_ims_crop.nii.gz',
            'image9047800_20060306':
            mri_folder +
            '9047800_20060306_SAG_3D_DESS_LEFT_016610874403_FemoralCartilage_ims_crop.nii.gz',
            'image9056363_20051010':
            mri_folder +
            '9056363_20051010_SAG_3D_DESS_LEFT_016610100103_FemoralCartilage_ims_crop.nii.gz',
            'image9068453_20060131':
            mri_folder +
            '9068453_20060131_SAG_3D_DESS_LEFT_016610822403_FemoralCartilage_ims_crop.nii.gz',
            'image9085290_20051103':
            mri_folder +
            '9085290_20051103_SAG_3D_DESS_LEFT_016610952703_FemoralCartilage_ims_crop.nii.gz',
            'image9094865_20060209':
            mri_folder +
            '9094865_20060209_SAG_3D_DESS_LEFT_016610837203_FemoralCartilage_ims_crop.nii.gz',
            'image9102858_20060210':
            mri_folder +
            '9102858_20060210_SAG_3D_DESS_LEFT_016610859602_FemoralCartilage_ims_crop.nii.gz',
            'image9200458_20051202':
            mri_folder +
            '9200458_20051202_SAG_3D_DESS_LEFT_016610610903_FemoralCartilage_ims_crop.nii.gz',
            'image9279291_20051025':
            mri_folder +
            '9279291_20051025_SAG_3D_DESS_LEFT_016610219303_FemoralCartilage_ims_crop.nii.gz',
            'image9352437_20050411':
            mri_folder +
            '9352437_20050411_SAG_3D_DESS_LEFT_016610106806_FemoralCartilage_ims_crop.nii.gz',
            'image9352883_20051123':
            mri_folder +
            '9352883_20051123_SAG_3D_DESS_LEFT_016610798103_FemoralCartilage_ims_crop.nii.gz',
            'image9357137_20051212':
            mri_folder +
            '9357137_20051212_SAG_3D_DESS_LEFT_016610629903_FemoralCartilage_ims_crop.nii.gz',
            'image9357383_20050912':
            mri_folder +
            '9357383_20050912_SAG_3D_DESS_LEFT_016610520402_FemoralCartilage_ims_crop.nii.gz',
            'image9369649_20060224':
            mri_folder +
            '9369649_20060224_SAG_3D_DESS_LEFT_016610861903_FemoralCartilage_ims_crop.nii.gz',
            'image9403165_20060316':
            mri_folder +
            '9403165_20060316_SAG_3D_DESS_LEFT_016610900302_FemoralCartilage_ims_crop.nii.gz',
            'image9496443_20050811':
            mri_folder +
            '9496443_20050811_SAG_3D_DESS_LEFT_016610469823_FemoralCartilage_ims_crop.nii.gz',
            'image9567704_20050505':
            mri_folder +
            '9567704_20050505_SAG_3D_DESS_LEFT_016610398706_FemoralCartilage_ims_crop.nii.gz',
            'image9587749_20050707':
            mri_folder +
            '9587749_20050707_SAG_3D_DESS_LEFT_016610415806_FemoralCartilage_ims_crop.nii.gz',
            'image9596610_20050909':
            mri_folder +
            '9596610_20050909_SAG_3D_DESS_LEFT_016610499502_FemoralCartilage_ims_crop.nii.gz',
        },
        'label_images': {
            'groundtruth9003406_20060322':
            label_folder +
            '20060322_SAG_3D_DESS_LEFT_016610899303_FemoralCartilage_GT_crop.nii.gz',
            'groundtruth9007827_20051219':
            label_folder +
            '20051219_SAG_3D_DESS_LEFT_016610641606_FemoralCartilage_GT_crop.nii.gz',
            'groundtruth9094865_20060209':
            label_folder +
            '20060209_SAG_3D_DESS_LEFT_016610837203_FemoralCartilage_GT_crop.nii.gz',
            'groundtruth9200458_20051202':
            label_folder +
            '20051202_SAG_3D_DESS_LEFT_016610610903_FemoralCartilage_GT_crop.nii.gz',
            'groundtruth9352437_20050411':
            label_folder +
            '20050411_SAG_3D_DESS_LEFT_016610106806_FemoralCartilage_GT_crop.nii.gz',
            'groundtruth9403165_20060316':
            label_folder +
            '20060316_SAG_3D_DESS_LEFT_016610900302_FemoralCartilage_GT_crop.nii.gz',
            'groundtruth9496443_20050811':
            label_folder +
            '20050811_SAG_3D_DESS_LEFT_016610469823_FemoralCartilage_GT_crop.nii.gz',
            'groundtruth9567704_20050505':
            label_folder +
            '20050505_SAG_3D_DESS_LEFT_016610398706_FemoralCartilage_GT_crop.nii.gz',
            'groundtruth9047800_20060306':
            label_folder +
            '20060306_SAG_3D_DESS_LEFT_016610874403_FemoralCartilage_GT_crop.nii.gz',
            'groundtruth9056363_20051010':
            label_folder +
            '20051010_SAG_3D_DESS_LEFT_016610100103_FemoralCartilage_GT_crop.nii.gz',
            'groundtruth9068453_20060131':
            label_folder +
            '20060131_SAG_3D_DESS_LEFT_016610822403_FemoralCartilage_GT_crop.nii.gz',
            'groundtruth9085290_20051103':
            label_folder +
            '20051103_SAG_3D_DESS_LEFT_016610952703_FemoralCartilage_GT_crop.nii.gz',
            'groundtruth9102858_20060210':
            label_folder +
            '20060210_SAG_3D_DESS_LEFT_016610859602_FemoralCartilage_GT_crop.nii.gz',
            'groundtruth9279291_20051025':
            label_folder +
            '20051025_SAG_3D_DESS_LEFT_016610219303_FemoralCartilage_GT_crop.nii.gz',
            'groundtruth9352883_20051123':
            label_folder +
            '20051123_SAG_3D_DESS_LEFT_016610798103_FemoralCartilage_GT_crop.nii.gz',
            'groundtruth9357137_20051212':
            label_folder +
            '20051212_SAG_3D_DESS_LEFT_016610629903_FemoralCartilage_GT_crop.nii.gz',
            'groundtruth9357383_20050912':
            label_folder +
            '20050912_SAG_3D_DESS_LEFT_016610520402_FemoralCartilage_GT_crop.nii.gz',
            'groundtruth9369649_20060224':
            label_folder +
            '20060224_SAG_3D_DESS_LEFT_016610861903_FemoralCartilage_GT_crop.nii.gz',
            'groundtruth9587749_20050707':
            label_folder +
            '20050707_SAG_3D_DESS_LEFT_016610415806_FemoralCartilage_GT_crop.nii.gz'
        },
        'param_file': param_folder + 'param_single.ini'
    }

    #############################################################################
    ##################END PARAMETERS
    #############################################################################

    # cross-validation
    for train_indices, test_indices in cv:
        sourcedata_fold = {}
        sourcedata_fold['images'] = {}
        sourcedata_fold['label_images'] = {}
        sourcedata_fold['param_file'] = sourcedata['param_file']

        for ii in train_indices:
            sourcedata_fold['images'][CV_ims[ii]] = sourcedata['images'][
                CV_ims[ii]]
            sourcedata_fold['label_images'][
                CV_label[ii]] = sourcedata['label_images'][CV_label[ii]]

        #instantiate network
        network = fastr.Network(id_=network_name)

        # load MRI volumes
        source_t1 = network.create_source('NiftiImageFileCompressed',
                                          id_='images',
                                          sourcegroup='atlas')
        # load label volumes
        source_label = network.create_source('NiftiImageFileCompressed',
                                             id_='label_images',
                                             sourcegroup='atlas')
        # load configuration file for appearance model
        source_param = network.create_source('ConfigFile', id_='param_file')

        # Create filter image for source data
        scalespacefilter = network.create_node('GaussianScaleSpace',
                                               id_='scalespacefilter',
                                               memory='15G')
        scalespacefilter.inputs['image'] = source_t1.output
        scalespacefilter.inputs['scales'] = scales

        # Prepare mask
        threshold = network.create_node('PxThresholdImage',
                                        id_='threshold',
                                        memory='15G')
        morph = network.create_node('PxMorphology', id_='morph', memory='15G')

        # threshold mask
        threshold.inputs['image'] = source_label.output
        threshold.inputs['upper_threshold'] = [mask_threshold_const]

        # dilate mask
        morph.inputs['image'] = threshold.outputs['image']
        morph.inputs['operation'] = ['dilation']
        morph.inputs['operation_type'] = ['binary']
        morph.inputs['radius'] = radius

        # Sample the feature images
        sampler = network.create_node('SampleImage',
                                      id_='sampler',
                                      memory='15G')
        sampler.inputs['image'] = scalespacefilter.outputs['image']
        sampler.inputs['labels'] = source_label.output
        sampler.inputs['mask'] = morph.outputs['image']
        sampler.inputs['nsamples'] = nsamples

        # Train the classifier, use [num_cores] cores in parallel
        classifier = network.create_node('RandomForestTrain',
                                         id_='classifier',
                                         memory='15G',
                                         cores=8)
        link = network.create_link(sampler.outputs['sample_file'],
                                   classifier.inputs['samples'])
        link.collapse = 0
        classifier.inputs['parameters'] = source_param.output
        classifier.inputs['number_of_cores'] = (num_cores, )

        # Create sink
        out_classifier = network.create_sink('SKLearnClassifierFile',
                                             id_='out_classifier')
        out_classifier.input = classifier.outputs['classifier']

        # location of sink files
        sinkdata = {
            'out_classifier':
            output_folder_appearance + output_appearance_name + str(foldnr) +
            '_{sample_id}{ext}'
        }

        # print network
        print network.draw_network(img_format='svg', draw_dimension=True)
        fastr.log.info('^^^^^^^^^^^^^ Starting execution client.')

        # execute appearance model training
        network.execute(sourcedata_fold, sinkdata)
        # increment fold nr by one
        foldnr = foldnr + 1
コード例 #12
0
ファイル: testGS.py プロジェクト: wouterkessels/PREDICTFastr
def fit(X, y, groups, parameter_iterable):
    """Actual fitting,  performing the search over parameters."""
    estimator = SVC(class_weight='balanced', probability=True)
    cv = 2
    scoring = 'f1_weighted'
    verbose = True
    fit_params = None
    return_train_score = True
    error_score = 'raise'

    estimator = estimator
    cv = check_cv(cv, y, classifier=is_classifier(estimator))
    scorer_ = check_scoring(estimator, scoring=scoring)

    X, y, groups = indexable(X, y, groups)
    n_splits = cv.get_n_splits(X, y, groups)
    if verbose > 0 and isinstance(parameter_iterable, Sized):
        n_candidates = len(parameter_iterable)
        print("Fitting {0} folds for each of {1} candidates, totalling"
              " {2} fits".format(n_splits, n_candidates,
                                 n_candidates * n_splits))

    cv_iter = list(cv.split(X, y, groups))

    # Original: joblib
    # out = Parallel(
    #     n_jobs=n_jobs, verbose=verbose
    # )(delayed(_fit_and_score)(clone(base_estimator), X, y, scorer_,
    #                           train, test, verbose, parameters,
    #                           fit_params=fit_params,
    #                           return_train_score=return_train_score,
    #                           return_n_test_samples=True,
    #                           return_times=True, return_parameters=True,
    #                           error_score=error_score)
    #   for parameters in parameter_iterable
    #   for train, test in cv_iter)

    name = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10))
    tempfolder = os.path.join(fastr.config.mounts['tmp'], 'GS', name)
    if not os.path.exists(tempfolder):
        os.makedirs(tempfolder)

    # Create the parameter files
    parameter_files = dict()
    print parameter_iterable
    for num, parameters in enumerate(parameter_iterable):
        print parameters
        parameters["Number"] = str(num)

        # Convert parameter set to json
        fname = ('settings_{}.json').format(str(num))
        sourcename = os.path.join(tempfolder, 'parameters', fname)
        if not os.path.exists(os.path.dirname(sourcename)):
            os.makedirs(os.path.dirname(sourcename))

        with open(sourcename, 'w') as fp:
            json.dump(parameters, fp, indent=4)

        parameter_files[str(num)] = ('vfs://tmp/{}/{}/{}/{}').format('GS',
                                                                     name,
                                                                     'parameters',
                                                                     fname)

    # Create test-train splits
    traintest_files = dict()
    # TODO: ugly nummering solution
    num = 0
    for train, test in cv_iter:
        source_labels = ['train', 'test']

        source_data = pd.Series([train, test],
                                index=source_labels,
                                name='Train-test data')

        fname = ('traintest_{}.hdf5').format(str(num))
        sourcename = os.path.join(tempfolder, 'traintest', fname)
        if not os.path.exists(os.path.dirname(sourcename)):
            os.makedirs(os.path.dirname(sourcename))

        traintest_files[str(num)] = ('vfs://tmp/{}/{}/{}/{}').format('GS',
                                                                     name,
                                                                     'traintest',
                                                                     fname)

        sourcelabel = ("Source Data Iteration {}").format(str(num))
        source_data.to_hdf(sourcename, sourcelabel)

        num += 1

    # Create the files containing the estimator and settings
    estimator_labels = ['base_estimator', 'X', 'y', 'scorer',
                        'verbose', 'fit_params', 'return_train_score',
                        'return_n_test_samples',
                        'return_times', 'return_parameters',
                        'error_score']

    estimator_data = pd.Series([estimator, X, y, scorer_,
                                verbose,
                                fit_params, return_train_score,
                                True, True, True,
                                error_score],
                               index=estimator_labels,
                               name='estimator Data')
    fname = 'estimatordata.hdf5'
    estimatorname = os.path.join(tempfolder, fname)
    estimator_data.to_hdf(estimatorname, 'Estimator Data')

    estimatordata = ("vfs://tmp/{}/{}/{}").format('GS', name, fname)

    # Create the fastr network
    network = fastr.Network('GridSearch_' + name)
    estimator_data = network.create_source('HDF5', id_='estimator_source')
    traintest_data = network.create_source('HDF5', id_='traintest')
    parameter_data = network.create_source('JsonFile', id_='parameters')
    sink_output = network.create_sink('HDF5', id_='output')

    fitandscore = network.create_node('fitandscore', memory='2G', id_='fitandscore')
    fitandscore.inputs['estimatordata'].input_group = 'estimator'
    fitandscore.inputs['traintest'].input_group = 'traintest'
    fitandscore.inputs['parameters'].input_group = 'parameters'

    fitandscore.inputs['estimatordata'] = estimator_data.output
    fitandscore.inputs['traintest'] = traintest_data.output
    fitandscore.inputs['parameters'] = parameter_data.output
    sink_output.input = fitandscore.outputs['fittedestimator']

    source_data = {'estimator_source': estimatordata,
                   'traintest': traintest_files,
                   'parameters': parameter_files}
    sink_data = {'output': ("vfs://tmp/{}/{}/output_{{sample_id}}_{{cardinality}}{{ext}}").format('GS', name)}

    network.draw_network(network.id, draw_dimension=True)
    print source_data
    network.execute(source_data, sink_data, tmpdir=os.path.join(tempfolder, 'fastr'))

    # Read in the output data once finished
    # TODO: expanding fastr url is probably a nicer way
    sink_files = glob.glob(os.path.join(fastr.config.mounts['tmp'],'GS', name) + '/output*.hdf5')
    save_data = list()
    features_labels = list()
    for output in sink_files:
        data = pd.read_hdf(output)

        temp_save_data = data['RET']

        save_data.append(temp_save_data)
        features_labels.append(data['feature_labels'])

    # if one choose to see train score, "out" will contain train score info
    if return_train_score:
        (train_scores, test_scores, test_sample_counts,
         fit_time, score_time, parameters) = zip(*save_data)
    else:
        (test_scores, test_sample_counts,
         fit_time, score_time, parameters) = zip(*save_data)

    candidate_params = parameters[::n_splits]
    n_candidates = len(candidate_params)

    results = dict()

    def _store(key_name, array, weights=None, splits=False, rank=False):
        """A small helper to store the scores/times to the cv_results_"""
        array = np.array(array, dtype=np.float64).reshape(n_candidates,
                                                          n_splits)
        if splits:
            for split_i in range(n_splits):
                results["split%d_%s"
                        % (split_i, key_name)] = array[:, split_i]

        array_means = np.average(array, axis=1, weights=weights)
        results['mean_%s' % key_name] = array_means
        # Weighted std is not directly available in numpy
        array_stds = np.sqrt(np.average((array -
                                         array_means[:, np.newaxis]) ** 2,
                                        axis=1, weights=weights))
        results['std_%s' % key_name] = array_stds

        if rank:
            results["rank_%s" % key_name] = np.asarray(
                rankdata(-array_means, method='min'), dtype=np.int32)

    # Computed the (weighted) mean and std for test scores alone
    # NOTE test_sample counts (weights) remain the same for all candidates
    test_sample_counts = np.array(test_sample_counts[:n_splits],
                                  dtype=np.int)

    _store('test_score', test_scores, splits=True, rank=True,
           weights=test_sample_counts if iid else None)
    if return_train_score:
        _store('train_score', train_scores, splits=True)
    _store('fit_time', fit_time)
    _store('score_time', score_time)

    best_index = np.flatnonzero(results["rank_test_score"] == 1)[0]
    best_parameters = candidate_params[best_index]

    # Use one MaskedArray and mask all the places where the param is not
    # applicable for that candidate. Use defaultdict as each candidate may
    # not contain all the params
    param_results = defaultdict(partial(MaskedArray,
                                        np.empty(n_candidates,),
                                        mask=True,
                                        dtype=object))
    for cand_i, params in enumerate(candidate_params):
        for name, value in params.items():
            # An all masked empty array gets created for the key
            # `"param_%s" % name` at the first occurence of `name`.
            # Setting the value at an index also unmasks that index
            param_results["param_%s" % name][cand_i] = value

    results.update(param_results)

    # Store a list of param dicts at the key 'params'
    results['params'] = candidate_params

    cv_results_ = results
    best_index_ = best_index
    n_splits_ = n_splits

    if refit:
        # fit the best estimator using the entire dataset
        # clone first to work around broken estimators
        best_estimator = clone(base_estimator).set_params(
            **best_parameters)
        if y is not None:
            best_estimator.fit(X, y, **fit_params)
        else:
            best_estimator.fit(X, **fit_params)
        best_estimator_ = best_estimator
    return self