def fit_dataset(train_data, model, label, fit_args, sample_size=None): if sample_size is not None and sample_size < len(train_data): train_data = train_data.sample(n=sample_size, random_state=0) X = train_data.drop(columns=[label]) y = train_data[label] problem_type = infer_problem_type(y) label_cleaner = LabelCleaner.construct(problem_type=problem_type, y=y) y = label_cleaner.transform(y) feature_generator = AutoMLPipelineFeatureGenerator() X = feature_generator.fit_transform(X, y) X, X_val, y, y_val = generate_train_test_split( X, y, problem_type=problem_type, test_size=0.2, random_state=0) model.fit(X=X, y=y, X_val=X_val, y_val=y_val, **fit_args) return model, label_cleaner, feature_generator
def _infer_problem_type(y: Series, silent=False): return infer_problem_type(y=y, silent=silent)
def infer_problem_type(y: Series): return infer_problem_type(y=y)
################ train_data = TabularDataset('https://autogluon.s3.amazonaws.com/datasets/Inc/train.csv') # can be local CSV file as well, returns Pandas DataFrame test_data = TabularDataset('https://autogluon.s3.amazonaws.com/datasets/Inc/test.csv') # another Pandas DataFrame label = 'class' # specifies which column do we want to predict train_data = train_data.head(1000) # subsample for faster demo ##################################################### # Training custom model outside of TabularPredictor # ##################################################### # Separate features and labels X = train_data.drop(columns=[label]) y = train_data[label] problem_type = infer_problem_type(y=y) # Infer problem type (or else specify directly) naive_bayes_model = NaiveBayesModel(path='AutogluonModels/', name='CustomNaiveBayes', problem_type=problem_type) # Construct a LabelCleaner to neatly convert labels to float/integers during model training/inference, can also use to inverse_transform back to original. label_cleaner = LabelCleaner.construct(problem_type=problem_type, y=y) y_clean = label_cleaner.transform(y) naive_bayes_model.fit(X=X, y=y_clean) # Fit custom model # To save to disk and load the model, do the following: # load_path = naive_bayes_model.path # naive_bayes_model.save() # del naive_bayes_model # naive_bayes_model = NaiveBayesModel.load(path=load_path) # Prepare test data