Ejemplo n.º 1
0
    def train_model_agg( self, bags, y, cv_split_bags=None, sample_weight=None, param_search=True ):
        """Train instance aggregation function using quantile function."""

        # figure out number of quantiles and where to set them
        ninst = int( np.round( sum( [ len(bag) for bag in bags ] ) / float(len(bags)) ) )
        if self.quantiles is not None:
            nq = self.quantiles
        else:
            nq = 16
        if ninst <= nq:
            quantiles = np.linspace(0,100,ninst)
        else:
            quantiles = np.linspace(100.0/nq/2,100-100.0/nq/2,nq)

        p = []
        test_y = []
        if cv_split_bags is None:
            # train/test split
            skf = sklearn.model_selection.StratifiedKFold( n_splits=5, shuffle=True )
            cv_split_bags = list(skf.split(bags,y))

        # compute quantile function
        for f in range(5):
            train_idx,test_idx = cv_split_bags[f]
            for i in test_idx:
                pi = super(SIL,self).predict( bags[i], cv=f )
                if pi.shape[1] == 2:
                    q = np.percentile( pi[:,1], quantiles )
                else:
                    q = np.hstack( [ np.percentile( pi[:,c], quantiles ) for c in range(pi.shape[1]) ] )
                p.append( q )
                test_y.append( y[i] )
        p = np.vstack(p)
        test_y = np.array(test_y)

        # train model
        model_agg = LinearClassifier( classifier='svm' )
        self.C_agg,self.gamma_agg = model_agg.param_search( p, test_y, sample_weight=sample_weight, quick=False )
        model_agg.C = self.C_agg
        model_agg.fit( p, test_y, sample_weight=sample_weight, param_search=param_search, calibrate=self._calibrate )
        self._model_agg = (model_agg,quantiles)
Ejemplo n.º 2
0
                y_train = labels[idx_train, c]
                y_sw = y_train + len(label_names[c]) * labels_sw[idx_train]

                uniq = np.unique(y_sw).tolist()
                counts = np.array([(y_sw == l).sum() for l in uniq])
                counts = counts.sum().astype(float) / (counts * len(counts))
                sw = np.array([counts[uniq.index(y)] for y in y_sw])
            else:
                sw = None

            if mi_type is None:
                model = LinearClassifier(n_jobs=n_jobs, **options)
                model.fit(X_train,
                          y_train,
                          calibrate=calibrate,
                          param_search=True,
                          sample_weight=sw)
            elif mi_type in ['median', 'max']:
                model = SIL(n_jobs=n_jobs, **options)
                model.fit(X_train,
                          y_train,
                          calibrate=calibrate,
                          param_search=True,
                          sample_weight=sw)
            elif mi_type == 'quantile':
                if quantiles is not None:
                    options['quantiles'] = int(quantiles)
                model = SIL(n_jobs=n_jobs, **options)
                model.fit(X_train,
                          y_train,