def predict(self, dataset): log.debug("getting features...") X_test = tools.get_features(dataset) log.debug("predicting with model...") self.preds = self.model.predict(X_test) return self.preds
def transform(self, dataset, drop_categorical=True, return_df=False): """ extracts features from dataset and returns as numpy array :param dataset: dataset to extract features from :param drop_categorical: if true, drop categorical features :param return_df: if true, return result as pandas dataframe, else as numpy array of values :return: numpy array with features for each sample in dataset """ return tools.get_features(dataset, drop_categorical=drop_categorical, return_df=return_df)
def predict(self, dataset): log.debug("getting features...") df_test, categorical = tools.get_features(dataset, return_df=True, drop_categorical=False, return_categorical_list=True) self.d_test = lgb.Dataset(df_test, label=dataset.labels, categorical_feature=categorical) log.debug("predicting with model...") self.preds = self.model.predict(df_test) return self.preds
def fit(self, dataset, ROUNDS=100): log.debug("getting features...") df_train, categorical = tools.get_features(dataset, return_df=True, drop_categorical=False, return_categorical_list=True) self.d_train = lgb.Dataset(df_train, label=dataset.labels, categorical_feature=categorical) params = { 'task': 'train', 'boosting_type': 'gbdt', 'objective': 'binary', 'metric': {'binary_logloss'}, 'num_leaves': 96, 'max_depth': 10, 'feature_fraction': 0.9, 'bagging_fraction': 0.95, 'bagging_freq': 5 } log.debug("fitting classifier...") self.model = lgb.train(params, self.d_train, ROUNDS)
def fit(self, dataset): log.debug("getting features...") X_train = tools.get_features(dataset) log.debug("fitting classifier...") self.model.fit(X_train, dataset.labels)