Exemple #1
0
 def load(self):
     input_statefile  = pathjoin(self.exp_dir,self.exp_name+'_input.pkl')
     output_statefile = pathjoin(self.exp_dir,self.exp_name+'_output.pkl')
     input_state = {}
     if pathexists(input_statefile):
         print('Loading input state from', input_statefile)
         input_state = jlload(input_statefile)        
     output_state = {}
     if pathexists(output_statefile):
         print('Loading output state from', output_statefile)
         output_state = jlload(output_statefile)        
Exemple #2
0
 def summarize(self):
     output_statefile = pathjoin(self.exp_dir,self.exp_name+'_output.pkl')
     try:            
         output_state = jlload(output_statefile)                
     except:
         print('Error: unable to load output state files in exp_dir=%s'%self.exp_dir)
         return {}
     
     for model_id in output_state['model_eval']:
         print('model: %s'%model_id)
         scores = output_state[model_id]['scores']        
         for score_id, score_vals in scores.iteritems():
             print('mean %s: %7.4f (std=%7.4f)'%(score_id, mean(score_vals),
                                                 std(score_vals)))
Exemple #3
0
    def model_coef(self):
        output_statefile = pathjoin(self.exp_dir,'_'.join([self.exp_name,self.scaling_method,self.pred_mode,'output.pkl']))
        try:            
            output_state = jlload(output_statefile)                
        except:
            print('Error: unable to load output state files in exp_dir=%s'%self.exp_dir)
            return {}

        n_feat = 0
        for model_id in output_state['model_eval']:
            models = output_state[model_id]['models']
            coef_fn = self.model_params[model_id][-1]
            model_coef = []
            for i,model in enumerate(models):
                if model_id == 'xgb': 
                    w = coef_fn(model)
                    n_feat = max(n_feat,w.keys())
                else:
                    w = coef_fn(model)
                model_coef.append(w)

            if model_id == 'xgb':
                coef = zeros([len(models),n_feat])
                for j,mi in enumerate(model_coef):
                    coef[j,mi.keys()] = mi.values()
                model_coef = coef

            model_coef = asarray(model_coef)
            model_mean = mean(model_coef,axis=0)
            model_std  = std(model_coef,axis=0)

            coef[model_id] = {'coef':model_coef,
                              'mean':model_mean,
                              'std':model_std}

        return coef
Exemple #4
0
    def _collect_state(self,input_statefile,output_statefile,
                       output_exists='overwrite'):
        '''
        run _cv for an experiment (input_statefile) and serialize the output 
        (output_statefile), scales features using scaling_method, and excludes
        unlabeled (y=0) samples

        if output_exists=='overwrite': overwrite existing output dump
           output_exists=='noupdate': read existing output dump and return without updating
        
        '''

        input_state  = jlload(input_statefile)
        output_state = {}
        if output_exists != 'overwrite' and pathexists(output_statefile):
            print('Loading existing state from', output_statefile)
            output_state = jlload(output_statefile)
            if output_exists == 'noupdate':
                return input_state, output_state

        X = input_state['X_exp'].copy()
        y = input_state['y_exp'].copy()

        multi_output = y.size != max(y.shape)

        if multi_output:
            # error out if incompatible models in model_eval
            for model in self.model_eval:
                if model in model_nomulti:                    
                    print('Error: model %s not compatible with multi-output labels'%model)
                    return input_state,output_state

        if self.scaling_method=='Normalize':
            scale_fn = Normalizer(norm='l2').fit_transform
        elif self.scaling_method=='MinMax':
            scale_fn = MinMaxScaler().fit_transform
        elif self.scaling_method=='Standard':
            scale_fn = StandardScaler().fit_transform
        elif self.scaling_method==None:
            scale_fn = lambda X: X
        else:
            print('Error: unknown scaling method "%s"'%self.scaling_method)
            return input_state,output_state

        print('Scaling features using method "%s"'%self.scaling_method)
        X = scale_fn(X)

        # remove unlabeled samples
        if multi_output:
            labmask = (y!=0).any(axis=1)
            y = y[labmask,:]
        else:
            labmask = y!=0
            y = y[labmask].ravel()
        X = X[labmask,:]

        # make sure X,y are valid after scaling/masking operations
        check_X_y(X,y,multi_output=multi_output)

        # get number of *labeled* samples
        N = len(y)

        if self.cv_id == 'loo':
            cv = LeaveOneOut(N)
        elif self.pred_mode == 'clf':
            cv = StratifiedKFold(y,n_folds=self.train_folds,random_state=train_state)
        elif self.pred_mode == 'reg':
            cv = KFold(n=N,n_folds=self.train_folds,random_state=train_state,shuffle=True)

        output_state = self._run_cv(X,y,cv,output_state)
        output_state.update({'cv':cv,'cv_id':self.cv_id,'labmask':labmask,
                             'scaling_method':self.scaling_method,
                             'model_eval':self.model_eval,'model_classes':2,
                             'model_features':X.shape[1]})

        jldump(output_state,output_statefile,compress=jlcompress,
               cache_size=jlcache)

        return input_state, output_state