def _test(self, ichunk, cchunk, chunk_list_index, chunk_range_index): print('Exporting testing data for chunk %d,%d,%d' % tuple(cchunk.tolist())) offset = self.offset_list[chunk_list_index,:]; size = self.size_list[chunk_list_index,:] FRAG = None if len(self.test_chunks) == 1 and self.testin: if self.dpSupervoxelClassifier_verbose: print('Loading testing data') with open(self.testin, 'rb') as f: data = dill.load(f) FRAG = data['FRAG']; data = data['data'] frag = None; subgroups_out= list(self.label_subgroups_out) if self.iterative_mode: if self.iterative_frag[ichunk] is None: subgroups_out += ['thr'] else: frag = self.iterative_frag[ichunk] if frag is None: if self.doplots: frag = dpFRAG.makeBothFRAG(self.labelfile, cchunk, size, offset, [self.probfile, self.probaugfile], [self.rawfile, self.rawaugfile], self.raw_dataset, self.gtfile, self.outfile, self.label_subgroups, subgroups_out, G=FRAG, progressBar=self.progress_bar, feature_set=self.feature_set, has_ECS=self.has_ECS, verbose=self.dpSupervoxelClassifier_verbose) else: frag = dpFRAG.makeTestingFRAG(self.labelfile, cchunk, size, offset, [self.probfile, self.probaugfile], [self.rawfile, self.rawaugfile], self.raw_dataset, self.outfile, self.label_subgroups, subgroups_out, G=FRAG, progressBar=self.progress_bar, feature_set=self.feature_set, has_ECS=self.has_ECS, verbose=self.dpSupervoxelClassifier_verbose) if self.iterative_mode and self.iterative_frag[ichunk] is None: frag.isTraining = False; self.iterative_frag[ichunk] = frag if not (len(self.test_chunks) == 1 and self.testin): frag.createFRAG(update = self.iterative_mode) #frag.createFRAG(update = False) data = frag.createDataset(train=self.doplots) if self.testout: if self.dpSupervoxelClassifier_verbose: print('Dumping testing data') descr = 'Testing data from dpFRAG.py with command line:\n' + self.arg_str descr = ('With ini file "%s":\n' % (self.cfgfile,)) + self.ini_str data['DESCR'] = descr with open(self.testout, 'wb') as f: dill.dump({'data':data,'FRAG':frag.FRAG}, f) sdata = scale(data['data']) # normalize for the classifiers thr = -1 if self.iterative_mode: # merge based on current classifier frag.subgroups_out[-1] = '%.8f' % self.threshold_subgroups[self.iterative_mode_count] clf_predict, thr = self.get_merge_predict_thr(sdata) frag.agglomerate(clf_predict) # make the next training iteration load from the current agglomerated supervoxels self.iterative_frag[ichunk].srcfile = self.iterative_frag[ichunk].outfile self.iterative_frag[ichunk].subgroups = self.iterative_frag[ichunk].subgroups_out else: try: # predict merge or not on testing cube and write outputs at specified probability thresholds frag.threshold_agglomerate(self.clf.predict_proba(sdata), self.thresholds, self.threshold_subgroups) # there's an issue here if there are no mergers left, would be better to just copy the current # agglomeration, xxx - deal with this later. handled this explicitly in other locations. #except AttributeError: except: # if the classifier doesn't do probabilities just export single prediction frag.subgroups_out += ['single_' + self.classifier] frag.agglomerate(self.clf.predict(sdata)) return data,sdata,thr
feature_set = 'minimal' progressBar = True verbose = True # use getFeatures=False to only get the RAG (wihtout boundary voxels or features) getFeatures = False # instantiate frag and load data frag = dpFRAG.makeBothFRAG(labelfile, chunk, size, offset, [probfile, probaugfile], [rawfile, rawaugfile], raw_dataset, gtfile, outfile, label_subgroups, ['training', 'thr'], progressBar=progressBar, feature_set=feature_set, has_ECS=has_ECS, verbose=verbose) # hack to save raveled indices of overlap in context of whole volume (including boundary) # boundary size is saved in frag.eperim frag.ovlp_attrs += ['ovlp_cur_dilate'] # create graph generated_adjacency = np.zeros((frag.nsupervox, frag.nsupervox), dtype=np.int32)
def train(self): if self.dpSupervoxelClassifier_verbose: print('\nTRAIN') if self.trainin and (not self.classifierin or self.doplots): if self.dpSupervoxelClassifier_verbose: print('Loading training data') with open(self.trainin, 'rb') as f: data = dill.load(f) target = data['target']; fdata = data['data'] ntargets = target.size; nfeatures = fdata.shape[1] assert( nfeatures == self.nfeatures ) elif not self.classifierin: #dict_keys(['feature_names', 'DESCR', 'target_names', 'target', 'data']) nalloc = self.nchunks*self.nalloc_per_chunk nfeatures = self.nfeatures target = np.zeros((nalloc,), dtype=np.int64) fdata = np.zeros((nalloc,nfeatures), dtype=np.double) # accumulate training data from all training chunks cnt_targets = 0; ntargets = np.zeros((self.nchunks,),dtype=np.int64) for chunk in range(self.nchunks): cchunk, chunk_list_index, chunk_range_index = self.get_chunk_inds(chunk) offset = self.offset_list[chunk_list_index,:]; size = self.size_list[chunk_list_index,:] if chunk_list_index in self.test_chunks: continue print('Appending training data for chunk %d,%d,%d' % tuple(cchunk.tolist())) if self.iterative_mode: if self.iterative_frag[chunk] is None: frag = dpFRAG.makeBothFRAG(self.labelfile, cchunk, size, offset, [self.probfile, self.probaugfile], [self.rawfile, self.rawaugfile], self.raw_dataset, self.gtfile, self.outfile, self.label_subgroups, ['training','thr'], progressBar=self.progress_bar, feature_set=self.feature_set, has_ECS=self.has_ECS, verbose=self.dpSupervoxelClassifier_verbose) frag.isTraining = True; self.iterative_frag[chunk] = frag else: frag = self.iterative_frag[chunk] else: frag = dpFRAG.makeTrainingFRAG(self.labelfile, cchunk, size, offset, [self.probfile, self.probaugfile], [self.rawfile, self.rawaugfile], self.raw_dataset, self.gtfile, self.label_subgroups, feature_set=self.feature_set, has_ECS=self.has_ECS, progressBar=self.progress_bar, verbose=self.dpSupervoxelClassifier_verbose) frag.createFRAG(update = self.iterative_mode) #frag.createFRAG(update = False) data = frag.createDataset() ntargets[chunk] = data['target'].shape[0] target[cnt_targets:cnt_targets+ntargets[chunk]] = data['target'] fdata[cnt_targets:cnt_targets+ntargets[chunk],:] = data['data'] cnt_targets += ntargets[chunk] target = target[:cnt_targets]; fdata = fdata[:cnt_targets,:] if self.trainout: #dict_keys(['feature_names', 'DESCR', 'target_names', 'target', 'data']) descr = 'Training data from dpFRAG.py with command line:\n' + self.arg_str descr = ('With ini file "%s":\n' % (self.cfgfile,)) + self.ini_str data['data'] = fdata; data['target'] = target; data['DESCR'] = descr with open(self.trainout, 'wb') as f: dill.dump(data, f) if not self.classifierin or self.doplots: # everyone wants to be norml sdata = scale(fdata) # normalize for the classifiers if self.classifierin: if self.dpSupervoxelClassifier_verbose: print('\nLoading classifier:'); t = time.time() with open(self.classifierin, 'rb') as f: d = dill.load(f) self.clf = d['classifier']; else: if self.dpSupervoxelClassifier_verbose: print('\nTraining classifier %s with %d examples and %d features:' % (self.classifier, cnt_targets, nfeatures)); t = time.time() # train a classifier if self.classifier == 'lda': self.clf = LinearDiscriminantAnalysis(solver='svd', store_covariance=False, priors=self.priors) #self.clf = LinearDiscriminantAnalysis(solver='eigen', store_covariance=True, priors=self.priors) elif self.classifier == 'qda': self.clf = QuadraticDiscriminantAnalysis(priors=self.priors) elif self.classifier == 'rf': # the gala parameters #self.clf = RandomForestClassifier(n_estimators=100, criterion='entropy', max_depth=20, # bootstrap=False, random_state=None) #self.clf = RandomForestClassifier(n_estimators=5*nfeatures,n_jobs=self.n_jobs,max_depth=10) self.clf = RandomForestClassifier(n_estimators=256,n_jobs=self.n_jobs,max_depth=16) elif self.classifier == 'svm': self.clf = SVC(kernel='rbf',probability=True,cache_size=2000) elif self.classifier == 'nb': self.clf = GaussianNB() elif self.classifier == 'kn': self.clf = KNeighborsClassifier(n_neighbors=10,n_jobs=self.n_jobs) elif self.classifier == 'dc': self.clf = DecisionTreeClassifier(max_depth=10) elif self.classifier == 'ada': self.clf = AdaBoostClassifier() elif self.classifier == 'lr': self.clf = LogisticRegression(penalty='l2',dual=False,solver='sag',n_jobs=self.n_jobs) else: assert(False) # i never try anything, i just do it # train to the normalized data and merge or no merge targets self.clf.fit(sdata, target) if self.classifierout: with open(self.classifierout, 'wb') as f: dill.dump({'classifier':self.clf}, f) if self.dpSupervoxelClassifier_verbose: print('\tdone in %.4f s' % (time.time() - t)) # do the agglomeration to use as input to next iteration for iterative mode thr = -1 if self.iterative_mode: cnt_targets = 0 for chunk in range(self.nchunks): if self.iterative_frag[chunk] is None or not self.iterative_frag[chunk].isTraining: continue # get the feature data for current training chunk only cdata = sdata[cnt_targets:cnt_targets+ntargets[chunk],:] cnt_targets += ntargets[chunk] # merge based on current classifier self.iterative_frag[chunk].subgroups_out[-1] = '%.8f' \ % self.threshold_subgroups[self.iterative_mode_count] clf_predict, thr = self.get_merge_predict_thr(cdata) self.iterative_frag[chunk].agglomerate(clf_predict) # make the next training iteration load from the current agglomerated supervoxels self.iterative_frag[chunk].srcfile = self.iterative_frag[chunk].outfile self.iterative_frag[chunk].subgroups = self.iterative_frag[chunk].subgroups_out if self.doplots: return self.createPlots(target,sdata,self.clf,self.export_plots, name=self.classifier + '_train_' + '_'.join([str(x) for x in self.test_chunks]) + \ '_iter_' + str(self.iterative_mode_count), thr=thr, plot_features=self.plot_features)