def split_trees(trees, tree_labels, n_folds=10, shuffle=True): classes_, y = np.unique(tree_labels, return_inverse=True) tree_labels = y if shuffle: indices = np.arange(trees.shape[0]) random.shuffle(indices) trees = trees[indices] tree_labels = tree_labels[indices] # classes_ = np.arange(len(classes_)) cv = StratifiedKFold(tree_labels, n_folds=n_folds, shuffle=shuffle) train_indices, test_indices = next(cv.__iter__()) train_trees, train_lables = trees[train_indices], tree_labels[ train_indices] test_trees, test_lables = trees[test_indices], tree_labels[test_indices] return train_trees, train_lables, test_trees, test_lables, classes_
angle = np.arctan2(u[1], u[0]) angle = 180 * angle / np.pi # convert to degrees v *= 9 ell = mpl.patches.Ellipse(gmm.means_[n, :2], v[0], v[1], 180 + angle, color=color) ell.set_clip_box(ax.bbox) ell.set_alpha(0.5) ax.add_artist(ell) iris = datasets.load_iris() # Break up the dataset into non-overlapping training (75%) and testing # (25%) sets. skf = StratifiedKFold(iris.target, k=4) # Only take the first fold. train_index, test_index = skf.__iter__().next() X_train = iris.data[train_index] y_train = iris.target[train_index] X_test = iris.data[test_index] y_test = iris.target[test_index] n_classes = len(np.unique(y_train)) # Try GMMs using different types of covariances. classifiers = dict((x, GMM(n_components=n_classes, covariance_type=x)) for x in ["spherical", "diag", "tied", "full"]) n_classifiers = len(classifiers) pl.figure(figsize=(3 * n_classifiers / 2, 6))
angle = np.arctan(u[1] / u[0]) angle = 180 * angle / np.pi # convert to degrees v *= 9 ell = mpl.patches.Ellipse(gmm.means[n, :2], v[0], v[1], 180 + angle, color=color) ell.set_clip_box(ax.bbox) ell.set_alpha(0.5) ax.add_artist(ell) iris = datasets.load_iris() # Break up the dataset into non-overlapping training (75%) and testing # (25%) sets. skf = StratifiedKFold(iris.target, k=4) # Only take the first fold. train_index, test_index = skf.__iter__().next() X_train = iris.data[train_index] y_train = iris.target[train_index] X_test = iris.data[test_index] y_test = iris.target[test_index] n_classes = len(np.unique(y_train)) # Try GMMs using different types of covariances. classifiers = dict((x, GMM(n_components=n_classes, cvtype=x)) for x in ['spherical', 'diag', 'tied', 'full']) n_classifiers = len(classifiers)
def split_dataframe(df): kf = StratifiedKFold(df["OpenStatus"].values, 5) train, test = kf.__iter__().next() return df.take(train), df.take(test)