def train_model_agg( self, bags, y, cv_split_bags=None, sample_weight=None, param_search=True ): """Train instance aggregation function using quantile function.""" # figure out number of quantiles and where to set them ninst = int( np.round( sum( [ len(bag) for bag in bags ] ) / float(len(bags)) ) ) if self.quantiles is not None: nq = self.quantiles else: nq = 16 if ninst <= nq: quantiles = np.linspace(0,100,ninst) else: quantiles = np.linspace(100.0/nq/2,100-100.0/nq/2,nq) p = [] test_y = [] if cv_split_bags is None: # train/test split skf = sklearn.model_selection.StratifiedKFold( n_splits=5, shuffle=True ) cv_split_bags = list(skf.split(bags,y)) # compute quantile function for f in range(5): train_idx,test_idx = cv_split_bags[f] for i in test_idx: pi = super(SIL,self).predict( bags[i], cv=f ) if pi.shape[1] == 2: q = np.percentile( pi[:,1], quantiles ) else: q = np.hstack( [ np.percentile( pi[:,c], quantiles ) for c in range(pi.shape[1]) ] ) p.append( q ) test_y.append( y[i] ) p = np.vstack(p) test_y = np.array(test_y) # train model model_agg = LinearClassifier( classifier='svm' ) self.C_agg,self.gamma_agg = model_agg.param_search( p, test_y, sample_weight=sample_weight, quick=False ) model_agg.C = self.C_agg model_agg.fit( p, test_y, sample_weight=sample_weight, param_search=param_search, calibrate=self._calibrate ) self._model_agg = (model_agg,quantiles)
y_train = labels[idx_train, c] y_sw = y_train + len(label_names[c]) * labels_sw[idx_train] uniq = np.unique(y_sw).tolist() counts = np.array([(y_sw == l).sum() for l in uniq]) counts = counts.sum().astype(float) / (counts * len(counts)) sw = np.array([counts[uniq.index(y)] for y in y_sw]) else: sw = None if mi_type is None: model = LinearClassifier(n_jobs=n_jobs, **options) model.fit(X_train, y_train, calibrate=calibrate, param_search=True, sample_weight=sw) elif mi_type in ['median', 'max']: model = SIL(n_jobs=n_jobs, **options) model.fit(X_train, y_train, calibrate=calibrate, param_search=True, sample_weight=sw) elif mi_type == 'quantile': if quantiles is not None: options['quantiles'] = int(quantiles) model = SIL(n_jobs=n_jobs, **options) model.fit(X_train, y_train,