def load(self): input_statefile = pathjoin(self.exp_dir,self.exp_name+'_input.pkl') output_statefile = pathjoin(self.exp_dir,self.exp_name+'_output.pkl') input_state = {} if pathexists(input_statefile): print('Loading input state from', input_statefile) input_state = jlload(input_statefile) output_state = {} if pathexists(output_statefile): print('Loading output state from', output_statefile) output_state = jlload(output_statefile)
def summarize(self): output_statefile = pathjoin(self.exp_dir,self.exp_name+'_output.pkl') try: output_state = jlload(output_statefile) except: print('Error: unable to load output state files in exp_dir=%s'%self.exp_dir) return {} for model_id in output_state['model_eval']: print('model: %s'%model_id) scores = output_state[model_id]['scores'] for score_id, score_vals in scores.iteritems(): print('mean %s: %7.4f (std=%7.4f)'%(score_id, mean(score_vals), std(score_vals)))
def model_coef(self): output_statefile = pathjoin(self.exp_dir,'_'.join([self.exp_name,self.scaling_method,self.pred_mode,'output.pkl'])) try: output_state = jlload(output_statefile) except: print('Error: unable to load output state files in exp_dir=%s'%self.exp_dir) return {} n_feat = 0 for model_id in output_state['model_eval']: models = output_state[model_id]['models'] coef_fn = self.model_params[model_id][-1] model_coef = [] for i,model in enumerate(models): if model_id == 'xgb': w = coef_fn(model) n_feat = max(n_feat,w.keys()) else: w = coef_fn(model) model_coef.append(w) if model_id == 'xgb': coef = zeros([len(models),n_feat]) for j,mi in enumerate(model_coef): coef[j,mi.keys()] = mi.values() model_coef = coef model_coef = asarray(model_coef) model_mean = mean(model_coef,axis=0) model_std = std(model_coef,axis=0) coef[model_id] = {'coef':model_coef, 'mean':model_mean, 'std':model_std} return coef
def _collect_state(self,input_statefile,output_statefile, output_exists='overwrite'): ''' run _cv for an experiment (input_statefile) and serialize the output (output_statefile), scales features using scaling_method, and excludes unlabeled (y=0) samples if output_exists=='overwrite': overwrite existing output dump output_exists=='noupdate': read existing output dump and return without updating ''' input_state = jlload(input_statefile) output_state = {} if output_exists != 'overwrite' and pathexists(output_statefile): print('Loading existing state from', output_statefile) output_state = jlload(output_statefile) if output_exists == 'noupdate': return input_state, output_state X = input_state['X_exp'].copy() y = input_state['y_exp'].copy() multi_output = y.size != max(y.shape) if multi_output: # error out if incompatible models in model_eval for model in self.model_eval: if model in model_nomulti: print('Error: model %s not compatible with multi-output labels'%model) return input_state,output_state if self.scaling_method=='Normalize': scale_fn = Normalizer(norm='l2').fit_transform elif self.scaling_method=='MinMax': scale_fn = MinMaxScaler().fit_transform elif self.scaling_method=='Standard': scale_fn = StandardScaler().fit_transform elif self.scaling_method==None: scale_fn = lambda X: X else: print('Error: unknown scaling method "%s"'%self.scaling_method) return input_state,output_state print('Scaling features using method "%s"'%self.scaling_method) X = scale_fn(X) # remove unlabeled samples if multi_output: labmask = (y!=0).any(axis=1) y = y[labmask,:] else: labmask = y!=0 y = y[labmask].ravel() X = X[labmask,:] # make sure X,y are valid after scaling/masking operations check_X_y(X,y,multi_output=multi_output) # get number of *labeled* samples N = len(y) if self.cv_id == 'loo': cv = LeaveOneOut(N) elif self.pred_mode == 'clf': cv = StratifiedKFold(y,n_folds=self.train_folds,random_state=train_state) elif self.pred_mode == 'reg': cv = KFold(n=N,n_folds=self.train_folds,random_state=train_state,shuffle=True) output_state = self._run_cv(X,y,cv,output_state) output_state.update({'cv':cv,'cv_id':self.cv_id,'labmask':labmask, 'scaling_method':self.scaling_method, 'model_eval':self.model_eval,'model_classes':2, 'model_features':X.shape[1]}) jldump(output_state,output_statefile,compress=jlcompress, cache_size=jlcache) return input_state, output_state