Python join 예제들, medpy.features.utilities.join Python 예제들

예제 #1

0

파일 보기

파일: apply_rdf.py 프로젝트: loli/neuropipeline

def main():
	# catch parameters
	forest_file = sys.argv[1]
	case_folder = sys.argv[2]
	mask_file = sys.argv[3]
	segmentation_file = sys.argv[4]

        # loading case features
	feature_vector = []
	for _file in os.listdir(case_folder):
		if _file.endswith('.npy') and _file.startswith('feature.'):
			with open(os.path.join(case_folder, _file), 'r') as f:
				feature_vector.append(numpy.load(f))
	feature_vector = join(*feature_vector)
	if 1 == feature_vector.ndim:
		feature_vector = numpy.expand_dims(feature_vector, -1)

	# load and apply the decision forest
	with open(forest_file, 'r') as f:
		forest = pickle.load(f)
	classification_results = forest.predict(feature_vector)

	# preparing  image
	m, h = load(mask_file)
    	m = m.astype(numpy.bool)
    	o = numpy.zeros(m.shape, numpy.uint8)
    	o[m] = numpy.squeeze(classification_results).ravel()

	# applying the post-processing morphology
	#o = binary_dilation(o, iterations=2)
	#o = keep_largest_connected_component(o)
	o = binary_fill_holes(o)

	# savin the results
    	save(o, segmentation_file, h, True)

예제 #2

0

파일 보기

파일: intensity.py 프로젝트: AlexanderRuesch/medpy

 def test_intensities(self):
     """Test the feature: image intensity."""
     
     # Test 2D image with various settings
     i = numpy.asarray([[-1., 1, 2],
                        [ 0., 2, 4],
                        [ 1., 3, 5]])
     m = [[True, False, False],
          [False, True, False],
          [True, True, False]]
     e = [-1., 1, 2, 0, 2, 4, 1, 3, 5]
     em = [-1., 2., 1., 3.]
     
     r = intensities(i) # normalize = False, mask = slice(None)
     numpy.testing.assert_allclose(r, e, err_msg = 'intensities: 2D, single-spectrum, unmasked and not normalized')
     
     r = intensities(i, mask = m) # normalize = False
     numpy.testing.assert_allclose(r, em, err_msg = 'intensities: 2D, single-spectrum, masked and not normalized')
     
     r = intensities([i, i]) # normalize = False, mask = slice(None)
     numpy.testing.assert_allclose(r, join(e, e), err_msg = 'intensities: 2D, multi-spectrum, unmasked and not normalized')
     
     # Test 3D image
     i = numpy.asarray([i, i + 0.5])
     e = append(e, numpy.asarray(e) + 0.5)
     
     r = intensities(i) # normalize = False, mask = slice(None)
     numpy.testing.assert_allclose(r, e, err_msg = 'intensities: 3D, single-spectrum, unmasked and not normalized')
     
     # Test 4D image
     i = numpy.asarray([i, i + 0.5])
     e = append(e, numpy.asarray(e) + 0.5)
     
     r = intensities(i) # normalize = False, mask = slice(None)
     numpy.testing.assert_allclose(r, e, err_msg = 'intensities: 4D, single-spectrum, unmasked and not normalized')

예제 #3

0

파일 보기

파일: intensity.py 프로젝트: kleinfeld/medpy

def indices(image, voxelspacing = None, mask = slice(None)):
    """
    Takes an image and returns the voxels ndim-indices as voxel-wise feature. The voxel
    spacing is taken into account, i.e. the indices are not array indices, but millimeter
    indices.
    
    This is a multi-element feature where each element corresponds to one of the images
    axes, e.g. x, y, z, ...
    
    Note that this feature is independent of the actual image content, but depends
    solely on its shape. Therefore always a one-dimensional feature is returned, even if
    a multi-spectral image has been supplied. 
    
    @param image a single image or a list/tuple of images (for multi-spectral case)
    @type image ndarray | list of ndarrays | tuple of ndarrays
    @param voxelspacing the side-length of each voxel
    @type voxelspacing sequence of floats    
    @param mask a binary mask for the image
    @type mask ndarray

    @return each voxel ndim-index
    @type ndarray
    """
    if type(image) == tuple or type(image) == list:
        image = image[0]
        
    if not type(mask) is slice:
        mask = numpy.array(mask, copy=False, dtype=numpy.bool)
        
    if voxelspacing is None:
        voxelspacing = [1.] * image.ndim

    return join(*map(lambda (a, vs): a[mask].ravel() * vs, zip(numpy.indices(image.shape), voxelspacing)))

예제 #4

0

파일 보기

파일: intensity.py 프로젝트: xzf125244170/medpy

 def test_intensities(self):
     """Test the feature: image intensity."""
     
     # Test 2D image with various settings
     i = numpy.asarray([[-1., 1, 2],
                        [ 0., 2, 4],
                        [ 1., 3, 5]])
     m = [[True, False, False],
          [False, True, False],
          [True, True, False]]
     e = [-1., 1, 2, 0, 2, 4, 1, 3, 5]
     em = [-1., 2., 1., 3.]
     
     r = intensities(i) # normalize = False, mask = slice(None)
     numpy.testing.assert_allclose(r, e, err_msg = 'intensities: 2D, single-spectrum, unmasked and not normalized')
     
     r = intensities(i, mask = m) # normalize = False
     numpy.testing.assert_allclose(r, em, err_msg = 'intensities: 2D, single-spectrum, masked and not normalized')
     
     r = intensities([i, i]) # normalize = False, mask = slice(None)
     numpy.testing.assert_allclose(r, join(e, e), err_msg = 'intensities: 2D, multi-spectrum, unmasked and not normalized')
     
     # Test 3D image
     i = numpy.asarray([i, i + 0.5])
     e = append(e, numpy.asarray(e) + 0.5)
     
     r = intensities(i) # normalize = False, mask = slice(None)
     numpy.testing.assert_allclose(r, e, err_msg = 'intensities: 3D, single-spectrum, unmasked and not normalized')
     
     # Test 4D image
     i = numpy.asarray([i, i + 0.5])
     e = append(e, numpy.asarray(e) + 0.5)
     
     r = intensities(i) # normalize = False, mask = slice(None)
     numpy.testing.assert_allclose(r, e, err_msg = 'intensities: 4D, single-spectrum, unmasked and not normalized')

예제 #5

0

파일 보기

파일: classification.py 프로젝트: Ashrmis/albo

    def _run_interface(self, runtime):
        if not base.isdefined(self.inputs.segmentation_file):
            self.inputs.segmentation_file = self._gen_filename(
                'segmentation_file')
        if not base.isdefined(self.inputs.probability_file):
            self.inputs.probability_file = self._gen_filename(
                'probability_file')

        log.info('Appling RDF {} to features {}'.format(
            self.inputs.classifier_file,
            map(os.path.basename, self.inputs.feature_files)))

        features = []
        for path in self.inputs.feature_files:
            with open(path, 'r') as f:
                features.append(numpy.load(f))

        feature_vector = mutil.join(*features)
        if feature_vector.ndim == 1:
            feature_vector = numpy.expand_dims(feature_vector, -1)

        # load and apply the decision forest
        with gzip.open(self.inputs.classifier_file, 'r') as f:
            classifier = pickle.load(f)
            prob_classification = \
                classifier.predict_proba(feature_vector)[:, 1]
            # equivalent to forest.predict
            bin_classification = prob_classification > PROBABILITY_THRESHOLD

        # prepare result images to save to disk
        mask, header = mio.load(self.inputs.mask_file)
        mask = mask.astype(numpy.bool)
        segmentation_image = numpy.zeros(mask.shape, numpy.uint8)
        segmentation_image[mask] = numpy.squeeze(bin_classification).ravel()
        probability_image = numpy.zeros(mask.shape, numpy.float32)
        probability_image[mask] = numpy.squeeze(prob_classification).ravel()

        # apply the post-processing morphology
        segmentation_image = scipy.ndimage.morphology.binary_fill_holes(
            segmentation_image)

        mio.save(segmentation_image,
                 self.inputs.segmentation_file,
                 header,
                 force=True)
        mio.save(probability_image,
                 self.inputs.probability_file,
                 header,
                 force=True)
        return runtime

예제 #6

0

파일 보기

파일: intensity.py 프로젝트: kleinfeld/medpy

def __extract_feature(fun, image, mask = slice(None), **kwargs):
    """
    Convenient function to cope with multi-spectral images and feature normalization.
    
    @param fun the feature extraction function to call
    @param image the single or multi-spectral image
    @param mask the binary mask to select the voxels for which to extract the feature
    @param kwargs additional keyword arguments to be passed to the feature extraction function 
    """
    if not type(mask) is slice:
        mask = numpy.array(mask, copy=False, dtype=numpy.bool)
    
    if type(image) == tuple or type(image) == list:
        return utilities.join(*[fun(i, mask, **kwargs) for i in image])
    else:
        return fun(image, mask, **kwargs)

예제 #7

0

파일 보기

파일: apply_rdf.py 프로젝트: loli/nspipeline

def main():
	# catch parameters
	forest_file = sys.argv[1]
	case_folder = sys.argv[2]
	mask_file = sys.argv[3]
	feature_cnf_file = sys.argv[4]
	segmentation_file = sys.argv[5]
	probability_file = sys.argv[6]

	# load features to use and create proper names from them
	features_to_use = load_feature_names(feature_cnf_file)

        # loading case features
	feature_vector = []

	for feature_name in features_to_use:
		_file = os.path.join(case_folder, '{}.npy'.format(feature_name))
		if not os.path.isfile(_file):
			raise Exception('The feature "{}" could not be found in folder "{}". Breaking.'.format(feature_name, case_folder))
		with open(_file, 'r') as f:
			feature_vector.append(numpy.load(f))
	feature_vector = join(*feature_vector)
	if 1 == feature_vector.ndim:
		feature_vector = numpy.expand_dims(feature_vector, -1)

	# load and apply the decision forest
	with open(forest_file, 'r') as f:
		forest = pickle.load(f)
	probability_results = forest.predict_proba(feature_vector)[:,1]
	classification_results = probability_results > probability_threshold # equivalent to forest.predict

	# preparing  image
	m, h = load(mask_file)
    	m = m.astype(numpy.bool)
    	oc = numpy.zeros(m.shape, numpy.uint8)
	op = numpy.zeros(m.shape, numpy.float32)
    	oc[m] = numpy.squeeze(classification_results).ravel()
	op[m] = numpy.squeeze(probability_results).ravel()

	# applying the post-processing morphology
	oc = binary_fill_holes(oc)

	# saving the results
    	save(oc, segmentation_file, h, True)
    	save(op, probability_file, h, True)

예제 #8

0

파일 보기

파일: application.py 프로젝트: loli/neuroless

def __applyforest(forest, featurefiles, brainmaskfile, segmentationfile, probabilityfile):
    r"""Apply a forest using the features and save the results."""
    # memory-efficient loading of the features for this case
    features = join(*[numpy.load(featurefile, mmap_mode='r') for featurefile in featurefiles])
    if 1 == features.ndim:
        features = numpy.expand_dims(features, -1)
    
    # apply forest
    probability_results = forest.predict_proba(features)[:,1]
    classification_results = probability_results > PROBABILITY_THRESHOLD # equivalent to forest.predict
    
    # create result image
    m, h = load(brainmaskfile)
    m = m.astype(numpy.bool)
    oc = numpy.zeros(m.shape, numpy.uint8)
    op = numpy.zeros(m.shape, numpy.float32)
    oc[m] = numpy.squeeze(classification_results).ravel()
    op[m] = numpy.squeeze(probability_results).ravel()

    # saving the results
    save(oc, segmentationfile, h)
    save(op, probabilityfile, h)

예제 #9

0

파일 보기

파일: sample_trainingset.py 프로젝트: loli/neuropipeline

def main():
	# prepare settings
	left_out_case = sys.argv[4]
	src_dir = sys.argv[1]
	seg_dir = sys.argv[2]
	msk_str = sys.argv[3]
	trg_dir = '{}/{}/'.format(src_dir, left_out_case)

	# collect cases present
	cases = []
	for _file in os.listdir(src_dir):
		if True == os.path.isdir(os.path.join(src_dir, _file)):
			cases.append(_file)
	cases.sort()

	if verboose: print 'Preparing leave-{}-out training set'.format(left_out_case)
	# determine cases to use to the training set build in this round
	training_set_cases = list(cases)
	training_set_cases.remove(left_out_case)
	# initialize collection variables
	training_set_foreground_selections = dict.fromkeys(training_set_cases)
	training_set_background_selections = dict.fromkeys(training_set_cases)
	
	# use stratified random sampling to select a number of sample for each case
	for case in training_set_cases:
		if verboose: print 'Stratified random sampling of case {}'.format(case)
		# determine number of samples to draw from this case
		samples_to_draw = int(total_no_of_samples / len(training_set_cases))
		if debug: print 'samples_to_draw', samples_to_draw
		# load class memberships of case as binary array
		mask = load(msk_str.format(case))[0].astype(numpy.bool)
		truth = load(os.path.join(seg_dir, '{}.nii.gz'.format(case)))[0].astype(numpy.bool)
		class_vector = truth[mask]
		# determine how many fg and bg samples to draw from this case
		ratio = numpy.count_nonzero(~class_vector) / float(numpy.count_nonzero(class_vector))
		fg_samples_to_draw = int(samples_to_draw / (ratio + 1))
		bg_samples_to_draw = int(samples_to_draw / (ratio + 1) * ratio)
		if debug: print 'fg_samples_to_draw', fg_samples_to_draw
		if debug: print 'bg_samples_to_draw', bg_samples_to_draw
		if debug: print 'ratio fg:bg', '1:{}'.format(ratio)
		# check for exceptions
		if fg_samples_to_draw < min_no_of_samples_per_class_and_case: raise Exception('Current setting would lead to a drawing of only {} fg samples for case {}!'.format(fg_samples_to_draw, case))
		if bg_samples_to_draw < min_no_of_samples_per_class_and_case: raise Exception('Current setting would lead to a drawing of only {} bg samples for case {}!'.format(bg_samples_to_draw, case))
		if fg_samples_to_draw > numpy.count_nonzero(class_vector):
			raise Exception('Current settings would require to draw {} fg samples, but only {} present for case {}!'.format(fg_samples_to_draw, numpy.count_nonzero(class_vector), case))
		if bg_samples_to_draw > numpy.count_nonzero(~class_vector):
			raise Exception('Current settings would require to draw {} bg samples, but only {} present for case {}!'.format(bg_samples_to_draw, numpy.count_nonzero(~class_vector), case))
		# get sample indices and split into fg and bg indices
		samples_indices = numpy.arange(len(class_vector))
		fg_samples_indices = samples_indices[class_vector]
		bg_samples_indices = samples_indices[~class_vector]
		if debug: print 'fg_samples_indices.shape', fg_samples_indices.shape
		if debug: print 'bg_samples_indices.shape', bg_samples_indices.shape
		# randomly draw the required number of sample indices
		numpy.random.shuffle(fg_samples_indices)
		numpy.random.shuffle(bg_samples_indices)
		fg_sample_selection = fg_samples_indices[:fg_samples_to_draw]
		bg_sample_selection = bg_samples_indices[:bg_samples_to_draw]
		if debug: print 'fg_sample_selection.shape', fg_sample_selection.shape
		if debug: print 'bg_sample_selection.shape', bg_sample_selection.shape
		# add to collection
		training_set_foreground_selections[case] = fg_sample_selection
		training_set_background_selections[case] = bg_sample_selection
		
	# load the features of each case, draw the samples from them and append them to a training set
	fg_samples = []
	bg_samples = []
	for case in training_set_cases:
		if verboose: print 'Sampling features of case {}'.format(case)
		
		# loading and sampling features piece-wise to avoid excessive memory requirements
		fg_samples_case = []
		bg_samples_case = []
		feature_list = []
		for _file in os.listdir(os.path.join(src_dir, case)):
			if _file.endswith('.npy') and _file.startswith('feature.'):
				feature_list.append(_file[:-4])
				with open(os.path.join(src_dir, case, _file), 'r') as f:
					feature_vector = numpy.load(f)
					fg_samples_case.append(feature_vector[training_set_foreground_selections[case]])
					bg_samples_case.append(feature_vector[training_set_background_selections[case]])
				
		# join and append feature vector from this case
		fg_samples.append(join(*fg_samples_case))
		bg_samples.append(join(*bg_samples_case))
		
	# prepare training set as numpy array and the class memberships
	fg_samples = append(*fg_samples)
	bg_samples = append(*bg_samples)
	samples_class_memberships = numpy.zeros(len(fg_samples) + len(bg_samples), dtype=numpy.bool)
	samples_class_memberships[:len(fg_samples)] += numpy.ones(len(fg_samples), dtype=numpy.bool)
	samples_feature_vector = append(fg_samples, bg_samples)
	
	if debug: print 'samples_feature_vector', samples_feature_vector.shape
	if debug: print 'class_memberships', samples_class_memberships.shape
	
	# save feature vector, feature names and class membership vector as leave-one-out training set
	if verboose: print 'Saving training data set'
	with open('{}/trainingset.features.npy'.format(trg_dir), 'wb') as f:
		numpy.save(f, samples_feature_vector)
	with open('{}/trainingset.classes.npy'.format(trg_dir), 'wb') as f:
		numpy.save(f, samples_class_memberships)
	with open('{}/trainingset.fnames.npy'.format(trg_dir), 'wb') as f:
		numpy.save(f, feature_list)
		
	if verboose: print
			
	if verboose: print 'Done.'

예제 #10

0

파일 보기

파일: features.py 프로젝트: loli/neuroless

def stratifiedrandomsampling(featureclassquadrupel, trainingsetfile, classsetfile, nsamples = 500000, min_no_of_samples_per_class_and_case = 20):
    r"""
    Extract a training sample set from the supplied feature sets using stratified random sampling.
    
    Parameters
    ----------
    featureclassquadrupel : list of tuples
        Triples containing (a) a list of a cases feature files, (b) the corresponding
        class membership file, (c) the brain mask file and (d) the sample point file.
    trainingsetfile : string
        The target training set file.
    classsetfile : string
        The target class membership file.
    nsamples : int or False, optional
        The amount of samples to draw. If False, all are drawn.
    min_no_of_samples_per_class_and_case : int
        An Exception is raised, when less this amount of samples are drawn from for class of a case.
    
    Raises
    ------
    InvalidConfigurationError
        When the current configuration would require to draw more samples than present in a case or even none.
    InvalidConfigurationError
        When less samples than defined by ``min_no_of_samples_per_class_and_case`` are about to be drawn fro a class from a single case.
    """
    logger = Logger.getInstance()
    
    # determine amount of samples to draw from each case
    ncases = len(featureclassquadrupel)
    nsamplescase = int(nsamples / ncases)
    logger.debug('drawing {} samples from {} cases each (total {} samples)'.format(nsamplescase, ncases, nsamples))
    
    # initialize collectors
    fg_samples = []
    bg_samples = []
    
    for cid, (featurefiles, classfile, brainmaskfile, featurepointfile) in enumerate(featureclassquadrupel):
        
        # adapt samples to draw from last case to draw a total of nsamples
        if len(featureclassquadrupel) - 1 == cid:
            nsamplescase += nsamples % ncases
        
        # load the class memberships
        classes = numpy.load(classfile, mmap_mode='r') 
        
        # determine number of fg and bg samples to draw for this case
        nbgsamples = int(float(numpy.count_nonzero(~classes)) / classes.size * nsamplescase)
        nfgsamples = int(float(numpy.count_nonzero(classes)) / classes.size * nsamplescase)
        nfgsamples += nsamplescase - (nfgsamples + nbgsamples) # +/- a little
        logger.debug('iteration {}: drawing {} fg and {} bg samples'.format(cid, nfgsamples, nbgsamples))
        
        # check for exceptions
        if nfgsamples < min_no_of_samples_per_class_and_case: raise InvalidConfigurationError('Current setting would lead to a drawing of only {} fg samples for case {}!'.format(nfgsamples, classfile))
        if nbgsamples < min_no_of_samples_per_class_and_case: raise InvalidConfigurationError('Current setting would lead to a drawing of only {} bg samples for case {}!'.format(nbgsamples, classfile))
        if nfgsamples > numpy.count_nonzero(classes):
            raise InvalidConfigurationError('Current settings would require to draw {} fg samples, but only {} present for case {}!'.format(nfgsamples, numpy.count_nonzero(classes), classfile))
        if nbgsamples > numpy.count_nonzero(~classes):
            raise InvalidConfigurationError('Current settings would require to draw {} bg samples, but only {} present for case {}!'.format(nbgsamples, numpy.count_nonzero(~classes), classfile))
        
        # get sample indices and split into fg and bg indices
        samples_indices = numpy.arange(len(classes))
        fg_samples_indices = samples_indices[classes]
        bg_samples_indices = samples_indices[~classes]
        
        # randomly draw the required number of sample indices
        numpy.random.shuffle(fg_samples_indices)
        numpy.random.shuffle(bg_samples_indices)
        fg_sample_selection = fg_samples_indices[:nfgsamples]
        bg_sample_selection = bg_samples_indices[:nbgsamples]
        
        # memory-efficient loading of the features for this case
        features = join(*[numpy.load(featurefile, mmap_mode='r') for featurefile in featurefiles])
        
        # draw and add to collection
        fg_samples.append(features[fg_sample_selection])
        bg_samples.append(features[bg_sample_selection])
        
        # create and save sample point file
        mask, maskh = load(brainmaskfile)
        mask = mask.astype(numpy.bool)
        featurepointimage = numpy.zeros_like(mask, numpy.uint8)
        featurepointimage = _setimagepointstwofilter(featurepointimage, mask, fg_sample_selection, SAMPLEPOINT_FG_VALUE)
        featurepointimage = _setimagepointstwofilter(featurepointimage, mask, bg_sample_selection, SAMPLEPOINT_BG_VALUE)
        save(featurepointimage, featurepointfile, maskh)

    # join and append feature vectors of all cases
    fg_samples = append(*fg_samples)
    bg_samples = append(*bg_samples)
    
    # build class membership    
    samples_class_memberships = numpy.zeros(len(fg_samples) + len(bg_samples), dtype=numpy.bool)
    samples_class_memberships[:len(fg_samples)] += numpy.ones(len(fg_samples), dtype=numpy.bool)
    
    # join fg and bg feature vectors
    samples_feature_vector = append(fg_samples, bg_samples)
    
    # save all
    with open(trainingsetfile, 'wb') as f:
        numpy.save(f, samples_feature_vector)
    with open(classsetfile, 'wb') as f:
        numpy.save(f, samples_class_memberships)

예제 #11

0

파일 보기

파일: features.py 프로젝트: loli/neuroless

def _extract(imagefiles, destfiles, brainmaskfile, fndestfile, groundtruthfile = False, cmdestfile = False):
    r"""
    Extract all features from the supplied image.
    
    Parameters
    ----------
    imagefiles : sequence of strings
        The images from which to extract the features.
    destfiles : sequence of strings
        The file in which to save the extracted features per images.
    brainmaskfile : string
        The corresponding brain mask.
    fndestfile : string
        The destination file for the feature names.        
    groundtruthfile : string
        The corresponding ground-truth file.
    cmdestfile : string
        The destination file for the class memberships.
    """
    # loading the support images
    msk = load(brainmaskfile)[0].astype(numpy.bool)
    if groundtruthfile: gt = load(groundtruthfile)[0].astype(numpy.bool)
    
    # for each pair of image and destination files
    for imagefile, destfile in zip(imagefiles, destfiles):
        
        # prepare feature vector and the feature identification list
        feature_vector = None
        feature_names = []        
        
        # load the image
        img, hdr = load(imagefile)
        
        # iterate the features to extract
        for function_call, function_arguments, voxelspacing in FEATURE_CONFIG:
            
            # extract the feature
            call_arguments = list(function_arguments)
            if voxelspacing: call_arguments.append(header.get_pixel_spacing(hdr))
            call_arguments.append(msk)
            fv = function_call(img, *call_arguments)
            
            # append to the images feature vector
            if feature_vector is None:
                feature_vector = fv
            else:
                feature_vector = join(feature_vector, fv)
                
            # create and save feature names
            feature_name = '{}.{}'.format(function_call.__name__, '_'.join(map(str, function_arguments)))
            if fv.ndim > 1:
                feature_names.extend(['{}.{}'.format(feature_name, i) for i in range(fv.shape[0])])
            else:
                feature_names.append(feature_name)
        
        # save the extracted feature vector and the feature names
        with open(destfile, 'wb') as f:
            numpy.save(f, feature_vector.astype(FEATURE_DTYPE))
    
    # save the feature names (only once, at the end)
    with open(fndestfile, 'wb') as f:
        pickle.dump(feature_names, f)
        
    # save the class memberships (truncated by the brain mask)
    if groundtruthfile:
        with open(cmdestfile, 'wb') as f:
            pickle.dump(gt[msk], f)

예제 #12

0

파일 보기

파일: sample_trainingset.py 프로젝트: loli/nspipeline

def main():
	# catch arguments
	src_dir = sys.argv[1]
	seg_dir = sys.argv[2]
	msk_dir = sys.argv[3]
	trg_dir = sys.argv[4]
	feature_cnf_file = sys.argv[5]
	total_no_of_samples = int(sys.argv[6])
	training_set_cases = sys.argv[7:]

	# load features to use and create proper names from them
	features_to_use = load_feature_names(feature_cnf_file)

	# warn if target sample set already exists
	if os.path.isfile('{}/trainingset.features.npy'.format(trg_dir)):
		if override:
			print 'WARNING: The target file {}/trainingset.features.npy already exists and will be replaced by a new sample.'.format(trg_dir)
		else:
			print 'WARNING: The target file {}/trainingset.features.npy already exists. Skipping.'.format(trg_dir)
			sys.exit(0)

	if verboose: print 'Preparing leave-out training set'
	# initialize collection variables
	training_set_foreground_selections = dict.fromkeys(training_set_cases)
	training_set_background_selections = dict.fromkeys(training_set_cases)
	
	# use stratified random sampling to select a number of sample for each case
	for case in training_set_cases:
		if verboose: print 'Stratified random sampling of case {}'.format(case)
		# determine number of samples to draw from this case
		samples_to_draw = int(total_no_of_samples / len(training_set_cases))
		if debug: print 'samples_to_draw', samples_to_draw
		# load class memberships of case as binary array
		mask = load(os.path.join(msk_dir, '{}.nii.gz'.format(case)))[0].astype(numpy.bool)
		truth = load(os.path.join(seg_dir, '{}.nii.gz'.format(case)))[0].astype(numpy.bool)
		class_vector = truth[mask]
		# determine how many fg and bg samples to draw from this case
		ratio = numpy.count_nonzero(~class_vector) / float(numpy.count_nonzero(class_vector))
		fg_samples_to_draw = int(samples_to_draw / (ratio + 1))
		bg_samples_to_draw = int(samples_to_draw / (ratio + 1) * ratio)
		if debug: print 'fg_samples_to_draw', fg_samples_to_draw
		if debug: print 'bg_samples_to_draw', bg_samples_to_draw
		if debug: print 'ratio fg:bg', '1:{}'.format(ratio)
		# check for exceptions
		if fg_samples_to_draw < min_no_of_samples_per_class_and_case: raise Exception('Current setting would lead to a drawing of only {} fg samples for case {}!'.format(fg_samples_to_draw, case))
		if bg_samples_to_draw < min_no_of_samples_per_class_and_case: raise Exception('Current setting would lead to a drawing of only {} bg samples for case {}!'.format(bg_samples_to_draw, case))
		if fg_samples_to_draw > numpy.count_nonzero(class_vector):
			raise Exception('Current settings would require to draw {} fg samples, but only {} present for case {}!'.format(fg_samples_to_draw, numpy.count_nonzero(class_vector), case))
		if bg_samples_to_draw > numpy.count_nonzero(~class_vector):
			raise Exception('Current settings would require to draw {} bg samples, but only {} present for case {}!'.format(bg_samples_to_draw, numpy.count_nonzero(~class_vector), case))
		# get sample indices and split into fg and bg indices
		samples_indices = numpy.arange(len(class_vector))
		fg_samples_indices = samples_indices[class_vector]
		bg_samples_indices = samples_indices[~class_vector]
		if debug: print 'fg_samples_indices.shape', fg_samples_indices.shape
		if debug: print 'bg_samples_indices.shape', bg_samples_indices.shape
		# randomly draw the required number of sample indices
		numpy.random.shuffle(fg_samples_indices)
		numpy.random.shuffle(bg_samples_indices)
		fg_sample_selection = fg_samples_indices[:fg_samples_to_draw]
		bg_sample_selection = bg_samples_indices[:bg_samples_to_draw]
		if debug: print 'fg_sample_selection.shape', fg_sample_selection.shape
		if debug: print 'bg_sample_selection.shape', bg_sample_selection.shape
		# add to collection
		training_set_foreground_selections[case] = fg_sample_selection
		training_set_background_selections[case] = bg_sample_selection
		
	# load the features of each case, draw the samples from them and append them to a training set
	fg_samples = []
	bg_samples = []
	for case in training_set_cases:
		if verboose: print 'Sampling features of case {}'.format(case)
		
		# loading and sampling features piece-wise to avoid excessive memory requirements
		fg_samples_case = []
		bg_samples_case = []
		for feature_name in features_to_use:
			_file = os.path.join(src_dir, case, '{}.npy'.format(feature_name))
			if not os.path.isfile(_file):
				raise Exception('The feature "{}" for case {} could not be found in folder "{}". Breaking.'.format(feature_name, case, os.path.join(src_dir, case)))
			with open(_file, 'r') as f:
				feature_vector = numpy.load(f)
				fg_samples_case.append(feature_vector[training_set_foreground_selections[case]])
				bg_samples_case.append(feature_vector[training_set_background_selections[case]])
				
		# join and append feature vector from this case
		fg_samples.append(join(*fg_samples_case))
		bg_samples.append(join(*bg_samples_case))
		
	# prepare training set as numpy array and the class memberships
	fg_samples = append(*fg_samples)
	bg_samples = append(*bg_samples)
	samples_class_memberships = numpy.zeros(len(fg_samples) + len(bg_samples), dtype=numpy.bool)
	samples_class_memberships[:len(fg_samples)] += numpy.ones(len(fg_samples), dtype=numpy.bool)
	samples_feature_vector = append(fg_samples, bg_samples)
	
	if debug: print 'samples_feature_vector', samples_feature_vector.shape
	if debug: print 'class_memberships', samples_class_memberships.shape
	
	# save feature vector, feature names and class membership vector as leave-one-out training set
	if verboose: print 'Saving training data set'
	with open('{}/trainingset.features.npy'.format(trg_dir), 'wb') as f:
		numpy.save(f, samples_feature_vector)
	with open('{}/trainingset.classes.npy'.format(trg_dir), 'wb') as f:
		numpy.save(f, samples_class_memberships)
	with open('{}/trainingset.fnames.npy'.format(trg_dir), 'wb') as f:
		numpy.save(f, features_to_use)
	with open('{}/trainingset.fgselections.pkl'.format(trg_dir), 'wb') as f:
		pickle.dump(training_set_foreground_selections, f)
	with open('{}/trainingset.bgselections.pkl'.format(trg_dir), 'wb') as f:
		pickle.dump(training_set_background_selections, f)
		
	if verboose: print
			
	if verboose: print 'Done.'