def setUp(self): dataset_filepath = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'datasets/heart_scale') self.X, self.y = import_libsvm_sparse( dataset_filepath).format_sklearn() self.quota = 10
def split_train_test(dataset_filepath, test_size, n_labeled): X, y = import_libsvm_sparse(dataset_filepath).format_sklearn() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42, shuffle=True) trn_ds = Dataset(X_train, np.concatenate([y_train[:n_labeled], [None] * (len(y_train) - n_labeled)])) tst_ds = Dataset(X_test, y_test) fully_labeled_trn_ds = Dataset(X_train, y_train) return trn_ds, tst_ds, y_train, fully_labeled_trn_ds
def split_train_test(dataset_filepath, test_size, n_labeled): X, y = import_libsvm_sparse(dataset_filepath).format_sklearn() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size) trn_ds = Dataset(X_train, np.concatenate([y_train[:n_labeled], [None] * (len(y_train) - n_labeled)])) tst_ds = Dataset(X_test, y_test) fully_labeled_trn_ds = Dataset(X_train, y_train) return trn_ds, tst_ds, y_train, fully_labeled_trn_ds
def split_train_test(): dataset_filepath = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'diabetes.txt') X, y = import_libsvm_sparse(dataset_filepath).format_sklearn() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) n_labeled = 10 trn_ds = Dataset(X_train, np.concatenate([y_train[:n_labeled], [None] * (len(y_train) - n_labeled)])) tst_ds = Dataset(X_test, y_test) fully_labeled_trn_ds = Dataset(X_train, y_train) return trn_ds, tst_ds, y_train, fully_labeled_trn_ds
def split_train_test(dataset_filepath, test_size, n_labeled): X, y = import_libsvm_sparse(dataset_filepath).format_sklearn() X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=test_size) while len(np.unique((y_train[:n_labeled]))) != 2: X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=test_size) trn_ds = Dataset(X_train, np.concatenate( [y_train[:n_labeled], [None] * (len(y_train) - n_labeled)])) tst_ds = Dataset(X_test, y_test) fully_labeled_trn_ds = Dataset(X_train, y_train) return trn_ds, tst_ds, y_train, fully_labeled_trn_ds