Ejemplo n.º 1
0
    def _test(self, ichunk, cchunk, chunk_list_index, chunk_range_index):
        print('Exporting testing data for chunk %d,%d,%d' % tuple(cchunk.tolist()))
        offset = self.offset_list[chunk_list_index,:]; size = self.size_list[chunk_list_index,:]

        FRAG = None
        if len(self.test_chunks) == 1 and self.testin:
            if self.dpSupervoxelClassifier_verbose:
                print('Loading testing data')
            with open(self.testin, 'rb') as f: data = dill.load(f)
            FRAG = data['FRAG']; data = data['data']

        frag = None; subgroups_out= list(self.label_subgroups_out)
        if self.iterative_mode:
            if self.iterative_frag[ichunk] is None: subgroups_out += ['thr']
            else: frag = self.iterative_frag[ichunk]

        if frag is None:
            if self.doplots:
                frag = dpFRAG.makeBothFRAG(self.labelfile, cchunk, size, offset,
                    [self.probfile, self.probaugfile], [self.rawfile, self.rawaugfile],
                    self.raw_dataset, self.gtfile, self.outfile, self.label_subgroups, subgroups_out,
                    G=FRAG, progressBar=self.progress_bar, feature_set=self.feature_set, has_ECS=self.has_ECS,
                    verbose=self.dpSupervoxelClassifier_verbose)
            else:
                frag = dpFRAG.makeTestingFRAG(self.labelfile, cchunk, size, offset,
                    [self.probfile, self.probaugfile], [self.rawfile, self.rawaugfile],
                    self.raw_dataset, self.outfile, self.label_subgroups, subgroups_out, G=FRAG,
                    progressBar=self.progress_bar, feature_set=self.feature_set, has_ECS=self.has_ECS,
                    verbose=self.dpSupervoxelClassifier_verbose)

        if self.iterative_mode and self.iterative_frag[ichunk] is None:
            frag.isTraining = False; self.iterative_frag[ichunk] = frag

        if not (len(self.test_chunks) == 1 and self.testin):
            frag.createFRAG(update = self.iterative_mode)
            #frag.createFRAG(update = False)
            data = frag.createDataset(train=self.doplots)

            if self.testout:
                if self.dpSupervoxelClassifier_verbose:
                    print('Dumping testing data')
                descr = 'Testing data from dpFRAG.py with command line:\n' + self.arg_str
                descr = ('With ini file "%s":\n' % (self.cfgfile,)) + self.ini_str
                data['DESCR'] = descr
                with open(self.testout, 'wb') as f: dill.dump({'data':data,'FRAG':frag.FRAG}, f)

        sdata = scale(data['data'])     # normalize for the classifiers

        thr = -1
        if self.iterative_mode:
            # merge based on current classifier
            frag.subgroups_out[-1] = '%.8f' % self.threshold_subgroups[self.iterative_mode_count]
            clf_predict, thr = self.get_merge_predict_thr(sdata)
            frag.agglomerate(clf_predict)

            # make the next training iteration load from the current agglomerated supervoxels
            self.iterative_frag[ichunk].srcfile = self.iterative_frag[ichunk].outfile
            self.iterative_frag[ichunk].subgroups = self.iterative_frag[ichunk].subgroups_out
        else:
            try:
                # predict merge or not on testing cube and write outputs at specified probability thresholds
                frag.threshold_agglomerate(self.clf.predict_proba(sdata), self.thresholds, self.threshold_subgroups)
                # there's an issue here if there are no mergers left, would be better to just copy the current
                #   agglomeration, xxx - deal with this later. handled this explicitly in other locations.
                #except AttributeError:
            except:
                # if the classifier doesn't do probabilities just export single prediction
                frag.subgroups_out += ['single_' + self.classifier]
                frag.agglomerate(self.clf.predict(sdata))

        return data,sdata,thr
Ejemplo n.º 2
0
feature_set = 'minimal'
progressBar = True
verbose = True

# use getFeatures=False to only get the RAG (wihtout boundary voxels or features)
getFeatures = False

# instantiate frag and load data
frag = dpFRAG.makeBothFRAG(labelfile,
                           chunk,
                           size,
                           offset, [probfile, probaugfile],
                           [rawfile, rawaugfile],
                           raw_dataset,
                           gtfile,
                           outfile,
                           label_subgroups, ['training', 'thr'],
                           progressBar=progressBar,
                           feature_set=feature_set,
                           has_ECS=has_ECS,
                           verbose=verbose)

# hack to save raveled indices of overlap in context of whole volume (including boundary)
# boundary size is saved in frag.eperim
frag.ovlp_attrs += ['ovlp_cur_dilate']

# create graph
generated_adjacency = np.zeros((frag.nsupervox, frag.nsupervox),
                               dtype=np.int32)
Ejemplo n.º 3
0
    def train(self):

        if self.dpSupervoxelClassifier_verbose: print('\nTRAIN')

        if self.trainin and (not self.classifierin or self.doplots):
            if self.dpSupervoxelClassifier_verbose:
                print('Loading training data')
            with open(self.trainin, 'rb') as f: data = dill.load(f)
            target = data['target']; fdata = data['data']
            ntargets = target.size; nfeatures = fdata.shape[1]
            assert( nfeatures == self.nfeatures )

        elif not self.classifierin:
            #dict_keys(['feature_names', 'DESCR', 'target_names', 'target', 'data'])
            nalloc = self.nchunks*self.nalloc_per_chunk
            nfeatures = self.nfeatures
            target = np.zeros((nalloc,), dtype=np.int64)
            fdata = np.zeros((nalloc,nfeatures), dtype=np.double)

            # accumulate training data from all training chunks
            cnt_targets = 0; ntargets = np.zeros((self.nchunks,),dtype=np.int64)
            for chunk in range(self.nchunks):
                cchunk, chunk_list_index, chunk_range_index = self.get_chunk_inds(chunk)
                offset = self.offset_list[chunk_list_index,:]; size = self.size_list[chunk_list_index,:]

                if chunk_list_index in self.test_chunks: continue
                print('Appending training data for chunk %d,%d,%d' % tuple(cchunk.tolist()))

                if self.iterative_mode:
                    if self.iterative_frag[chunk] is None:
                        frag = dpFRAG.makeBothFRAG(self.labelfile, cchunk, size, offset,
                            [self.probfile, self.probaugfile], [self.rawfile, self.rawaugfile],
                            self.raw_dataset, self.gtfile, self.outfile, self.label_subgroups, ['training','thr'],
                            progressBar=self.progress_bar, feature_set=self.feature_set, has_ECS=self.has_ECS,
                            verbose=self.dpSupervoxelClassifier_verbose)
                        frag.isTraining = True; self.iterative_frag[chunk] = frag
                    else:
                        frag = self.iterative_frag[chunk]
                else:
                    frag = dpFRAG.makeTrainingFRAG(self.labelfile, cchunk, size, offset,
                        [self.probfile, self.probaugfile], [self.rawfile, self.rawaugfile],
                        self.raw_dataset, self.gtfile, self.label_subgroups, feature_set=self.feature_set,
                        has_ECS=self.has_ECS,
                        progressBar=self.progress_bar, verbose=self.dpSupervoxelClassifier_verbose)
                frag.createFRAG(update = self.iterative_mode)
                #frag.createFRAG(update = False)
                data = frag.createDataset()
                ntargets[chunk] = data['target'].shape[0]
                target[cnt_targets:cnt_targets+ntargets[chunk]] = data['target']
                fdata[cnt_targets:cnt_targets+ntargets[chunk],:] = data['data']
                cnt_targets += ntargets[chunk]
            target = target[:cnt_targets]; fdata = fdata[:cnt_targets,:]

            if self.trainout:
                #dict_keys(['feature_names', 'DESCR', 'target_names', 'target', 'data'])
                descr = 'Training data from dpFRAG.py with command line:\n' + self.arg_str
                descr = ('With ini file "%s":\n' % (self.cfgfile,)) + self.ini_str
                data['data'] = fdata; data['target'] = target; data['DESCR'] = descr
                with open(self.trainout, 'wb') as f: dill.dump(data, f)

        if not self.classifierin or self.doplots:
            # everyone wants to be norml
            sdata = scale(fdata)   # normalize for the classifiers

        if self.classifierin:
            if self.dpSupervoxelClassifier_verbose:
                print('\nLoading classifier:'); t = time.time()

            with open(self.classifierin, 'rb') as f: d = dill.load(f)
            self.clf = d['classifier'];
        else:
            if self.dpSupervoxelClassifier_verbose:
                print('\nTraining classifier %s with %d examples and %d features:' % (self.classifier,
                    cnt_targets, nfeatures)); t = time.time()

            # train a classifier
            if self.classifier == 'lda':
                self.clf = LinearDiscriminantAnalysis(solver='svd', store_covariance=False, priors=self.priors)
                #self.clf = LinearDiscriminantAnalysis(solver='eigen', store_covariance=True, priors=self.priors)
            elif self.classifier == 'qda':
                self.clf = QuadraticDiscriminantAnalysis(priors=self.priors)
            elif self.classifier == 'rf':
                # the gala parameters
                #self.clf = RandomForestClassifier(n_estimators=100, criterion='entropy', max_depth=20,
                #    bootstrap=False, random_state=None)
                #self.clf = RandomForestClassifier(n_estimators=5*nfeatures,n_jobs=self.n_jobs,max_depth=10)
                self.clf = RandomForestClassifier(n_estimators=256,n_jobs=self.n_jobs,max_depth=16)
            elif self.classifier == 'svm':
                self.clf = SVC(kernel='rbf',probability=True,cache_size=2000)
            elif self.classifier == 'nb':
                self.clf = GaussianNB()
            elif self.classifier == 'kn':
                self.clf = KNeighborsClassifier(n_neighbors=10,n_jobs=self.n_jobs)
            elif self.classifier == 'dc':
                self.clf = DecisionTreeClassifier(max_depth=10)
            elif self.classifier == 'ada':
                self.clf = AdaBoostClassifier()
            elif self.classifier == 'lr':
                self.clf = LogisticRegression(penalty='l2',dual=False,solver='sag',n_jobs=self.n_jobs)
            else:
                assert(False)   # i never try anything, i just do it

            # train to the normalized data and merge or no merge targets
            self.clf.fit(sdata, target)

            if self.classifierout:
                with open(self.classifierout, 'wb') as f: dill.dump({'classifier':self.clf}, f)

        if self.dpSupervoxelClassifier_verbose:
            print('\tdone in %.4f s' % (time.time() - t))

        # do the agglomeration to use as input to next iteration for iterative mode
        thr = -1
        if self.iterative_mode:
            cnt_targets = 0
            for chunk in range(self.nchunks):
                if self.iterative_frag[chunk] is None or not self.iterative_frag[chunk].isTraining: continue

                # get the feature data for current training chunk only
                cdata = sdata[cnt_targets:cnt_targets+ntargets[chunk],:]
                cnt_targets += ntargets[chunk]

                # merge based on current classifier
                self.iterative_frag[chunk].subgroups_out[-1] = '%.8f' \
                    % self.threshold_subgroups[self.iterative_mode_count]
                clf_predict, thr = self.get_merge_predict_thr(cdata)
                self.iterative_frag[chunk].agglomerate(clf_predict)

                # make the next training iteration load from the current agglomerated supervoxels
                self.iterative_frag[chunk].srcfile = self.iterative_frag[chunk].outfile
                self.iterative_frag[chunk].subgroups = self.iterative_frag[chunk].subgroups_out

        if self.doplots:
            return self.createPlots(target,sdata,self.clf,self.export_plots,
                name=self.classifier + '_train_' + '_'.join([str(x) for x in self.test_chunks]) + \
                '_iter_' + str(self.iterative_mode_count), thr=thr, plot_features=self.plot_features)