def __init__(self, on=None, ixname='ix', source_suffix='source', target_suffix='target', scoresuffix='score', **kwargs): """ Args: ixname (str): name of the index, default 'ix' source_suffix (str): suffix to be added to the left dataframe default 'left', gives --> 'ix_source' target_suffix (str): suffix to be added to the left dataframe default 'right', gives --> 'ixright' on (str): name of the column on which to do the join scoresuffix (str): suffix to be attached to the on column name """ TransformerMixin.__init__(self) self.ixname = ixname self.source_suffix = source_suffix self.target_suffix = target_suffix self.ixnamesource, self.ixnametarget, self.ixnamepairs = concatixnames( ixname=self.ixname, source_suffix=self.source_suffix, target_suffix=self.target_suffix) self.on = on self.scoresuffix = scoresuffix if self.on is None: self.outcol = self.scoresuffix else: self.outcol = self.on + '_' + self.scoresuffix self.fitted = False pass
def __init__(self, transformer=None, estimator=None, normalize=True, keep_tsne_outputs=False, **kwargs): TransformerMixin.__init__(self) BaseEstimator.__init__(self) if estimator is None: estimator = KNeighborsRegressor() if transformer is None: transformer = TSNE() self.estimator = estimator self.transformer = transformer self.keep_tsne_outputs = keep_tsne_outputs if not hasattr(transformer, "fit_transform"): raise AttributeError( "transformer {} does not have a 'fit_transform' " "method.".format(type(transformer))) if not hasattr(estimator, "predict"): raise AttributeError("estimator {} does not have a 'predict' " "method.".format(type(estimator))) self.normalize = normalize if kwargs: self.set_params(**kwargs)
def __init__(self, transformer=None, estimator=None, normalize=True, keep_tsne_outputs=False, **kwargs): """ :param transformer: `TSNE` by default :param estimator: `MLPRegressor` by default :param normalize: normalizes the outputs, centers and normalizes the output of the *t-SNE* and applies that same normalization to he prediction of the estimator :param keep_tsne_output: if True, keep raw outputs of :epkg:`TSNE` is stored in member *tsne_outputs_* :param kwargs: sent to :meth:`set_params <mlinsights.mlmodel. tsne_transformer.PredictableTSNE.set_params>`, see its documentation to understand how to specify parameters """ TransformerMixin.__init__(self) BaseEstimator.__init__(self) if estimator is None: estimator = KNeighborsRegressor() if transformer is None: transformer = TSNE() self.estimator = estimator self.transformer = transformer self.keep_tsne_outputs = keep_tsne_outputs if not hasattr(transformer, "fit_transform"): raise AttributeError( "Transformer {} does not have a 'fit_transform' " "method.".format(type(transformer))) if not hasattr(estimator, "predict"): raise AttributeError( "Estimator {} does not have a 'predict' method.".format( type(estimator))) self.normalize = normalize if kwargs: self.set_params(**kwargs)
def __init__(self, estimator, method=None, copy_estimator=True): """ @param estimator estimator to wrap in a transformer, it is cloned with the training data (deep copy) when fitted @param method if None, guess what method should be called, *transform* for a transformer, *predict_proba* for a classifier, *decision_function* if found, *predict* otherwiser @param copy_estimator copy the model instead of taking a reference """ TransformerMixin.__init__(self) BaseEstimator.__init__(self) self.estimator = estimator self.copy_estimator = copy_estimator if method is None: if hasattr(estimator, "transform"): method = "transform" elif hasattr(estimator, "predict_proba"): method = "predict_proba" elif hasattr(estimator, "decision_function"): method = "decision_function" elif hasattr(estimator, "predict"): method = "predict" else: raise AttributeError( "Cannot find a method transform, predict_proba, decision_function, predict in object {}" .format(type(estimator))) if not hasattr(estimator, method): raise AttributeError("Cannot find method '{}' in object {}".format( method, type(estimator))) self.method = method
def __init__(self, clustermixin=None, n_simple=10, n_hard=10, ixname='ix', source_suffix='source', target_suffix='target'): """ Args: clustermixin (ClusterMixin): if None, will use KbinsCluster with 25 clusters n_simple (int): number of simple questions per cluster n_hard (int): number of hard questions per cluster ixname (str): default 'ix' source_suffix (str): default 'left' target_suffix (str): default 'right' """ TransformerMixin.__init__(self) self.ixname = ixname self.source_suffix = source_suffix self.target_suffix = target_suffix self.ixnamesource, self.ixnametarget, self.ixnamepairs = concatixnames( ixname=self.ixname, source_suffix=self.source_suffix, target_suffix=self.target_suffix) if clustermixin is None: clustermixin = KBinsCluster(n_clusters=10) self._clustermixin = clustermixin self._simplequestions = SimpleQuestions(n_questions=n_simple) self._hardquestions = HardQuestions(n_questions=n_hard) self._clusterclassifier = ClusterClassifier( ixname=self.ixname, source_suffix=self.source_suffix, target_suffix=self.target_suffix) pass
def __init__(self, columns=None, remove=None, skip_errors=False, single=False, fLOG=None): """ constructor @param columns specify a columns selection @param remove modalities to remove @param skip_errors skip when a new categories appear (no 1) @param single use a single column per category, do not multiply them for each value @param fLOG logging function The logging function displays a message when a new dense and big matrix is created when it should be sparse. A sparse matrix should be allocated instead. """ BaseEstimator.__init__(self) TransformerMixin.__init__(self) self._p_columns = columns if isinstance( columns, list) or columns is None else [columns] self._p_skip_errors = skip_errors self._p_remove = remove self._p_single = single self.fLOG = fLOG
def __init__(self, onnx_bytes, output_name=None, enforce_float32=True): BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.onnx_bytes = onnx_bytes self.output_name = output_name self.enforce_float32 = enforce_float32 if not isinstance(onnx_bytes, bytes): raise TypeError("onnx_bytes must be bytes to be pickled.")
def __init__(self, kind='poly', poly_degree=2, poly_interaction_only=False, poly_include_bias=True): BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.kind = kind self.poly_degree = poly_degree self.poly_include_bias = poly_include_bias self.poly_interaction_only = poly_interaction_only
def __init__(self, vocab, merges, padding_length=-1, opset=None): BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.vocab = vocab self.merges = merges self.padding_length = padding_length self.opset = opset if get_library_path is None: raise ImportError("onnxruntime_extensions is not installed.")
def __init__(self, n_questions=10): """ Args: n_questions (int): number of explorer to be asked for each cluster """ TransformerMixin.__init__(self) self.n_questions = n_questions self.n_clusters = None self.clusters = None
def __init__(self, scaler_model, clf_model, hmm_model): prob_bins = np.array([-np.inf, 0.1, 0.3, 0.5, 0.7, 0.9, np.inf]) bins_discretizer = KBinsDiscretizer(encode='ordinal') bins_discretizer.n_bins_ = np.array([prob_bins.shape[0]]) bins_discretizer.bin_edges_ = prob_bins.reshape(1, -1) TransformerMixin.__init__(self) BaseEstimator.__init__(self) self.scaler_model_ = scaler_model self.clf_model_ = clf_model self.hmm_model_ = hmm_model self.bins_discretizer_ = bins_discretizer
def __init__(self, name, fct, kwargs): """ @param name function name @param fct python function @param kwargs parameters function """ BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.name_fct = name self._fct = fct self.kwargs = kwargs
def __init__(self, onnx_bytes, output_name=None, enforce_float32=True, runtime='python', change_batch_size=None, reshape=False): BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.onnx_bytes = (onnx_bytes if not hasattr(onnx_bytes, 'SerializeToString') else onnx_bytes.SerializeToString()) self.output_name = output_name self.enforce_float32 = enforce_float32 self.runtime = runtime self.change_batch_size = change_batch_size self.reshape = reshape
def __init__(self, onnx_bytes, output_name=None): """ :param onnx_bytes: bytes :param output_name: requested output name or None to request all and have method *transform* to store all of them in a dataframe """ BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.onnx_bytes = onnx_bytes self.output_name = output_name if not isinstance(onnx_bytes, bytes): raise TypeError("onnx_bytes must be bytes to be pickled.")
def __init__(self, species: list = [], rcut: int = 6, nmax: int = 6, lmax: int = 8, rbf: str = "gto", sigma: float = 0.125, average: str = "inner", periodic: bool = True, convert: bool = True): """ Initiallize class Parameters ---------- rcut : float A cutoff for local region in angstroms. Should be bigger than 1 angstrom nmax : int The number of radial basis functions. lmax : int The maximum degree of spherical harmonics. species : List list of elements sigma : float The standard deviation of the gaussians used to expand the atomic density. rbf : str The radial basis functions to use. The available options are: * "gto": Spherical gaussian type orbitals defined as :math:`g_{nl}(r) = \sum_{n'=1}^{n_\mathrm{max}}\,\\beta_{nn'l} r^l e^{-\\alpha_{n'l}r^2}` * "polynomial": Polynomial basis defined as :math:`g_{n}(r) = \sum_{n'=1}^{n_\mathrm{max}}\,\\beta_{nn'} (r-r_\mathrm{cut})^{n'+2}` periodic : bool Set to true if you want the descriptor output to respect the periodicity of the atomic systems (see the pbc-parameter in the constructor of ase.Atoms). average : str The averaging mode over the centers of interest. Valid options are: * "off": No averaging. * "inner": Averaging over sites before summing up the magnetic quantum numbers: :math:`p_{nn'l}^{Z_1,Z_2} \sim \sum_m (\\frac{1}{n} \sum_i c_{nlm}^{i, Z_1})^{*} (\\frac{1}{n} \sum_i c_{n'lm}^{i, Z_2})` * "outer": Averaging over the power spectrum of different sites: :math:`p_{nn'l}^{Z_1,Z_2} \sim \\frac{1}{n} \sum_i \sum_m (c_{nlm}^{i, Z_1})^{*} (c_{n'lm}^{i, Z_2})` convert : bool If true convert pymatgen structures to ase.atoms """ TransformerMixin.__init__(self) self.species = species self.rcut = rcut self.nmax = nmax self.lmax = lmax self.soap: Any = None self.rbf = rbf self.sigma = sigma self.average = average self.periodic = periodic self.convert = convert
def __init__(self, onnx_bytes, output_name=None, enforce_float32=True, runtime='onnxruntime1'): BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.onnx_bytes = (onnx_bytes if not hasattr(onnx_bytes, 'SerializeToString') else onnx_bytes.SerializeToString()) self.output_name = output_name self.enforce_float32 = enforce_float32 self.runtime = runtime
def __init__(self, on_source='source', on_target='target', compfunc=None, *args, **kwargs): """ base class for all transformers Args: on_source (str): name of suffix on_target (str): compfunc (callable): ['simple', 'token', 'exact'] """ TransformerMixin.__init__(self) self.left = on_source self.right = on_target if compfunc is None: raise ValueError('comparison function not provided with function', compfunc) assert callable(compfunc) self.compfunc = compfunc
def __init__(self, ixname='ix', source_suffix='source', target_suffix='target'): TransformerMixin.__init__(self) self.ixname = ixname self.source_suffix = source_suffix self.target_suffix = target_suffix self.ixnamesource, self.ixnametarget, self.ixnamepairs = concatixnames( ixname=self.ixname, source_suffix=self.source_suffix, target_suffix=self.target_suffix) self.index = pd.Index self.dfnum = pd.DataFrame() self.dfix = pd.DataFrame() self.num = None
def __init__(self, ixname='ix', source_suffix='source', target_suffix='target', usecols=None, **kwargs): TransformerMixin.__init__(self) self.ixname = ixname self.source_suffix = source_suffix self.target_suffix = target_suffix self.ixnamesource, self.ixnametarget, self.ixnamepairs = concatixnames( ixname=self.ixname, source_suffix=self.source_suffix, target_suffix=self.target_suffix) self.usecols = usecols pass
def __init__(self, model, periods=1, freq='30min'): """Lags a dataset. Lags all features. Missing data is dropped for fitting, and replaced with the mean for predict. :periods: Number of timesteps to lag by """ assert isinstance(model, BaseEstimator), "`model` isn't a scikit-learn model" BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.periods = periods self.freq = freq self.model = model
def __init__( self, normalizer=None, transformer=None, estimator=None, normalize=True, keep_tsne_outputs=False, ): """ @param normalizer None by default @param transformer :epkg:`sklearn:manifold:TSNE` by default @param estimator :epkg:`sklearn:neural_network:MLPRegressor` by default @param normalize normalizes the outputs, centers and normalizes the output of the *t-SNE* and applies that same normalization to he prediction of the estimator @param keep_tsne_output if True, keep raw outputs of :epkg:`TSNE` is stored in member *tsne_outputs_* """ TransformerMixin.__init__(self) BaseEstimator.__init__(self) if estimator is None: estimator = MLPRegressor() if transformer is None: transformer = TSNE() self.estimator = estimator self.transformer = transformer self.normalizer = normalizer self.keep_tsne_outputs = keep_tsne_outputs if normalizer is not None and not hasattr(normalizer, "transform"): raise AttributeError( "normalizer {} does not have a 'transform' method.".format( type(normalizer))) if not hasattr(transformer, "fit_transform"): raise AttributeError( "transformer {} does not have a 'fit_transform' method.". format(type(transformer))) if not hasattr(estimator, "predict"): raise AttributeError( "estimator {} does not have a 'predict' method.".format( type(estimator))) self.normalize = normalize
def __init__(self, ixname='ix', source_suffix='source', target_suffix='target'): """ Args: ixname: 'ix' source_suffix: 'source' target_suffix: 'target' """ TransformerMixin.__init__(self) self.ixname = ixname self.source_suffix = source_suffix self.target_suffix = target_suffix self.ixnamesource, self.ixnametarget, self.ixnamepairs = concatixnames( ixname=self.ixname, source_suffix=self.source_suffix, target_suffix=self.target_suffix)
def __init__( self, elements: List, rcut: float = 10.1, stepSize: float = 0.1, sigma: float = 0.2, ): """ Parameters : list list of elements symbols """ TransformerMixin.__init__(self) self.elements = elements self.rdf_tup = calc_rdf_tup(elements) self.rcut = rcut self.stepSize = stepSize self.sigma = sigma self.binRad = np.arange(0.1, self.rcut, self.stepSize) self.numBins = len(self.binRad) self.numPairs = len(self.rdf_tup)
def __init__(self, columns=None, remove=None, skip_errors=False, single=False, fLOG=None): """ constructor @param columns specify a columns selection @param remove modalities to remove @param skip_errors skip when a new categories appear (no 1) @param single use a single column per category, do not multiply them for each value @param fLOG logging function The logging function displays a message when a new dense and big matrix is created when it should be sparse. A sparse matrix should be allocated instead. """ BaseEstimator.__init__(self) TransformerMixin.__init__(self) self._p_columns = columns if isinstance( columns, list) or columns is None else [columns] self._p_skip_errors = skip_errors self._p_remove = remove self._p_single = single self.fLOG = fLOG
def __init__(self, model, nbest_size=1, alpha=0.5, reverse=False, add_bos=False, add_eos=False, opset=None): BaseEstimator.__init__(self) TransformerMixin.__init__(self) if isinstance(model, bytes): self.model_b64 = model else: ints = model.tolist() b64 = base64.b64encode(ints) self.model_b64 = b64 self.nbest_size = nbest_size self.alpha = alpha self.reverse = reverse self.add_bos = add_bos self.add_eos = add_eos self.opset = opset if get_library_path is None: raise ImportError("onnxruntime_extensions is not installed.")
def __init__(self): BaseEstimator.__init__(self) TransformerMixin.__init__(self)
def __init__(self, alpha=0.): BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.alpha = alpha
def __init__(self, op_version=TARGET_OPSET): BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.op_version = op_version
def __init__(self, thresholds): BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.thresholds = thresholds
def __init__(self): BaseEstimator.__init__(self) TransformerMixin.__init__(self) self.op_version = 12
def __init__(self, label): TransformerMixin.__init__(self) self.label = label