예제 #1
0
 def __init__(self, estimator=None, clus=None, **kwargs):
     """
     @param  estimator   :epkg:`sklearn:linear_model:LogisiticRegression`
                         by default
     @param  clus        clustering applied on each class,
                         by default k-means with two classes
     @param  kwargs      sent to :meth:`set_params
                         <mlinsights.mlmodel.classification_kmeans.
                         ClassifierAfterKMeans.set_params>`,
                         see its documentation to understand how to
                         specify parameters
     """
     ClassifierMixin.__init__(self)
     BaseEstimator.__init__(self)
     if estimator is None:
         estimator = LogisticRegression()
     if clus is None:
         clus = KMeans(n_clusters=2)
     self.estimator = estimator
     self.clus = clus
     if not hasattr(clus, "transform"):
         raise AttributeError(  # pragma: no cover
             "clus does not have a transform method.")
     if kwargs:
         self.set_params(**kwargs)
예제 #2
0
    def __init__(self, binner=None, estimator=None, n_jobs=None,
                 random_state=None, verbose=False):
        """
        @param      binner              transformer or predictor which creates the buckets
        @param      estimator           predictor trained on every bucket
        @param      n_jobs              number of parallel jobs (for training and predicting)
        @param      random_state        to pick up random examples when buckets do not
                                        contain enough examples of each class
        @param      verbose             boolean or use ``'tqdm'`` to use :epkg:`tqdm`
                                        to fit the estimators

        *binner* allows the following values:

        - ``tree``: the model is :epkg:`sklearn:tree:DecisionTreeClassifier`
        - ``'bins'``: the model :epkg:`sklearn:preprocessing:KBinsDiscretizer`
        - any instanciated model

        *estimator* allows the following values:

        - ``None``: the model is :epkg:`sklearn:linear_model:LogisticRegression`
        - any instanciated model
        """
        if estimator is None:
            estimator = LogisticRegression()
        if binner in ('tree', None):
            binner = DecisionTreeClassifier(min_samples_leaf=5)
        ClassifierMixin.__init__(self)
        PiecewiseEstimator.__init__(
            self, binner=binner, estimator=estimator,
            n_jobs=n_jobs, verbose=verbose)
        self.random_state = random_state
예제 #3
0
    def __init__(self,
                 connector,
                 pruningclf,
                 sbsmodel,
                 classifier,
                 ixname='ix',
                 source_suffix='source',
                 target_suffix='target',
                 **kwargs):
        """

        Args:
            connector (ConnectorMixin): Connector (Scorer) used to do the calculation,
            pruningclf (Explorer): Classifier used to do the pruning (0=no match, 1: potential match, 2: sure match)
            sbsmodel (TransformerMixin): Side-by-Side scorer, Can be FeatureUnion, Pipeline...
            classifier (ClassifierMixin): Classifier used to do the prediction
            ixname (str): 'ix'
            source_suffix (str): 'left'
            target_suffix (str): 'right'
        """
        ClassifierMixin.__init__(self)
        self.ixname = ixname
        self.source_suffix = source_suffix
        self.target_suffix = target_suffix
        self.ixnamesource, self.ixnametarget, self.ixnamepairs = concatixnames(
            ixname=self.ixname,
            source_suffix=self.source_suffix,
            target_suffix=self.target_suffix)
        self.fitted = False
        self.connector = connector
        self.pruningclf = pruningclf
        self.sbsmodel = sbsmodel
        self.classifier = classifier
        pass
 def __init__(self, estimator=None, threshold=0.75):
     ClassifierMixin.__init__(self)
     BaseEstimator.__init__(self)
     if estimator is None:
         estimator = LogisticRegression(solver='liblinear')
     self.estimator = estimator
     self.threshold = threshold
예제 #5
0
    def __init__(self,
                 ixname='ix',
                 source_suffix='source',
                 target_suffix='target',
                 **kwargs):
        ClassifierMixin.__init__(self)
        self.ixname = ixname
        self.source_suffix = source_suffix
        self.target_suffix = target_suffix
        self.ixnamesource, self.ixnametarget, self.ixnamepairs = concatixnames(
            ixname=self.ixname,
            source_suffix=self.source_suffix,
            target_suffix=self.target_suffix)
        # clusters
        self.clusters = None

        # number of unique clusters
        self.n_clusters = None

        # clusters where no match has been found
        self.nomatch = None

        # clusters where all elements are positive matches
        self.allmatch = None

        # clusters where there is positive and negative values (matche and non-match)
        self.mixedmatch = None

        # Clusters not found (added in no matc)
        self.notfound = None

        self.fitted = False
        pass
예제 #6
0
    def __init__(self,
                 transformer,
                 classifier,
                 ixname='ix',
                 source_suffix='source',
                 target_suffix='target',
                 **kwargs):
        """

        Args:
            transformer (TransformerMixin):
            classifier (ClassifierMixin):
            ixname (str):
            source_suffix (str):
            target_suffix (str):
            n_jobs (int):
            pruning_ths (float): return only the pairs which have a score greater than the store_ths
        """
        ClassifierMixin.__init__(self)
        self.ixname = ixname
        self.source_suffix = source_suffix
        self.target_suffix = target_suffix
        self.ixnamesource, self.ixnametarget, self.ixnamepairs = concatixnames(
            ixname=self.ixname,
            source_suffix=self.source_suffix,
            target_suffix=self.target_suffix)
        self.fitted = False
        self.transformer = transformer
        self.classifier = classifier
        pass
예제 #7
0
    def __init__(self,
                 estimator=None,
                 max_depth=20,
                 min_samples_split=2,
                 min_samples_leaf=2,
                 min_weight_fraction_leaf=0.0,
                 fit_improve_algo='auto',
                 p1p2=0.09,
                 gamma=1.,
                 verbose=0):
        "constructor"
        ClassifierMixin.__init__(self)
        BaseEstimator.__init__(self)
        # logistic regression
        if estimator is None:
            self.estimator = LogisticRegression()
        else:
            self.estimator = estimator
        if max_depth is None:
            raise ValueError("'max_depth' cannot be None.")
        if max_depth > 1024:
            raise ValueError("'max_depth' must be <= 1024.")
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.min_weight_fraction_leaf = min_weight_fraction_leaf
        self.fit_improve_algo = fit_improve_algo
        self.p1p2 = p1p2
        self.gamma = gamma
        self.verbose = verbose

        if self.fit_improve_algo not in DecisionTreeLogisticRegression._fit_improve_algo_values:
            raise ValueError("fit_improve_algo='{}' not in {}".format(
                self.fit_improve_algo,
                DecisionTreeLogisticRegression._fit_improve_algo_values))
예제 #8
0
 def __init__(self,
              classifier,
              ixname='ix',
              source_suffix='source',
              target_suffix='target',
              **kwargs):
     """
     This is a wrapper around a classifier that allows it to train on partial data
     where X and y do not have the same index, (because of pruning steps,...)
     It will train (fit) the classifier on the common index
     Args:
         classifier (ClassifierMixin): Classifier to use. Should be the output of the pipeline
         ixname (str):
         source_suffix (str):
         target_suffix (str):
     """
     ClassifierMixin.__init__(self)
     self.ixname = ixname
     self.source_suffix = source_suffix
     self.target_suffix = target_suffix
     self.ixnamesource, self.ixnametarget, self.ixnamepairs = concatixnames(
         ixname=self.ixname,
         source_suffix=self.source_suffix,
         target_suffix=self.target_suffix)
     self.fitted = False
     self.classifier = classifier
     pass
예제 #9
0
	def __init__(self):
		ClassifierMixin.__init__(self)
		self.clasificadores = [RandomForest(),Boosting(),Gradient(),SVM(),SVM2()]#	,Bagging()]
예제 #10
0
 def __init__(self):
     ClassifierMixin.__init__(self)
     BaseEstimator.__init__(self)
예제 #11
0
 def __init__(self, penalty="l1"):
     BaseEstimator.__init__(self)
     ClassifierMixin.__init__(self)
     self.penalty = penalty
     self.estimator = LogisticRegression(penalty=self.penalty,
                                         solver="liblinear")
 def __init__(self, base_estimator):
     ClassifierMixin.__init__(self)
     BaseEstimator.__init__(self)
     self.base_estimator = base_estimator
예제 #13
0
 def __init__(self, func):
     ClassifierMixin.__init__(self)
     self.func = func
     assert callable(self.func)
 def __init__(self, penalty='l1'):
     BaseEstimator.__init__(self)
     ClassifierMixin.__init__(self)
     self.penalty = penalty
     self.estimator = LogisticRegression(penalty=self.penalty)