def get_data_metadata(X: DataFrame, y: Series) -> dict: X_raw = convert_to_raw(X) feature_metadata_orig = FeatureMetadata.from_df(X) feature_metadata_raw = FeatureMetadata.from_df(X_raw) num_rows, num_cols = X.shape num_null = X.isnull().sum().sum() try: problem_type = infer_problem_type(y, silent=True) except: # TODO: Remove, only here for legacy compatibility problem_type = infer_problem_type(y) if problem_type in ['binary', 'multiclass']: num_classes = len(y.unique()) else: num_classes = None data_metadata = { 'num_rows': num_rows, 'num_cols': num_cols, 'num_null': num_null, 'num_classes': num_classes, 'problem_type': problem_type, 'feature_metadata': feature_metadata_orig, 'feature_metadata_raw': feature_metadata_raw, } # TODO: class imbalance # TODO: has_text # TODO: has_special # TODO: memory size return data_metadata
################ train_data = task.Dataset(file_path='https://autogluon.s3.amazonaws.com/datasets/Inc/train.csv') # can be local CSV file as well, returns Pandas DataFrame test_data = task.Dataset(file_path='https://autogluon.s3.amazonaws.com/datasets/Inc/test.csv') # another Pandas DataFrame label_column = 'class' # specifies which column do we want to predict train_data = train_data.head(1000) # subsample for faster demo ############################################# # Training custom model outside of task.fit # ############################################# # Separate features and labels X_train = train_data.drop(columns=[label_column]) y_train = train_data[label_column] problem_type = infer_problem_type(y=y_train) # Infer problem type (or else specify directly) naive_bayes_model = NaiveBayesModel(path='AutogluonModels/', name='CustomNaiveBayes', problem_type=problem_type) # Construct a LabelCleaner to neatly convert labels to float/integers during model training/inference, can also use to inverse_transform back to original. label_cleaner = LabelCleaner.construct(problem_type=problem_type, y=y_train) y_train_clean = label_cleaner.transform(y_train) naive_bayes_model.fit(X_train=X_train, y_train=y_train_clean) # Fit custom model # To save to disk and load the model, do the following: # load_path = naive_bayes_model.path # naive_bayes_model.save() # del naive_bayes_model # naive_bayes_model = NaiveBayesModel.load(path=load_path) # Prepare test data