Example #1
0
def generate_data(choice='linear'):
    global X, Y, N
    if choice == 'linear':
        N = np.random.randint(50, 150)
        std = (np.random.randint(6, 10)) * 0.1
        X, Y = make_blobs(n_samples=N,
                          centers=2,
                          random_state=0,
                          cluster_std=0.60)
    elif choice == 'circle':
        N = np.random.randint(100, 200)
        factor = (np.random.randint(1, 6)) * 0.1
        X, Y = make_circles(n_samples=N, factor=factor, noise=0.1)
    plt.delaxes()
    plt.scatter(X[:, 0],
                X[:, 1],
                c=Y,
                cmap='winter',
                s=100,
                edgecolors='black')
    plt.xlabel("Feature1")
    plt.ylabel("Feature2")
    plt.title("DATA POINTS")
    gendat = BytesIO()
    plt.savefig(gendat, format='png')
    # figfile.seek(0)     # Rewind to the beginning of the file
    gendat_img = base64.b64encode(gendat.getbuffer()).decode('ascii')
    return gendat_img
Example #2
0
 def circlesexample(self):
     X, y = make_circles(90, factor=0.2, noise=0.1)
     r = np.exp(-(X**2).sum(1))
     zaxis = [0.2, 0.4, 0.6, 0.8, 1.0]
     zaxislabel = [r'0.2', r'0.4', r'0.6', r'0.8', r'1.0']
     self.ax.scatter(X[:, 0], X[:, 1], r, c=y, s=70, cmap='seismic')
     self.ax.view_init(elev=90, azim=90)
     self.ax.set_xlabel('X',
                        color='w',
                        fontproperties=self.prop,
                        fontsize=60)
     self.ax.set_ylabel('Y',
                        color='w',
                        fontproperties=self.prop,
                        fontsize=60)
     self.ax.set_zlabel('Z',
                        labelpad=-1,
                        color='red',
                        fontproperties=self.prop,
                        fontsize=60)
     self.ax.set_zticklabels(zaxislabel, fontsize=7, color='none')
     # self.ax.set_zticks([], False)
     self.ax.set_zticks(zaxis)
     plt.xticks(ticks=np.arange(-1.2, 1.4, .2), labels='')
     plt.yticks(ticks=np.arange(-1.2, 1.4, .2), labels='')
     self.ax.grid(linewidth=20)
     return self.fig,
Example #3
0
    def __init__(self):
        self.start_time = time.time()

        if (Configuration.data == "Social_Network_Ads.csv"):
            self.dataset = pd.read_csv(str(Configuration.data))

        if (Configuration.algorithm == "linear_regression"):
            self.X = self.dataset.iloc[:, :-1].values
            self.y = self.dataset.iloc[:, 1].values
        elif (Configuration.algorithm == "logistic_regression" or Configuration.algorithm == "svc"
                or Configuration.algorithm == "decision_tree_classification" or Configuration.algorithm == "random_forest_classification" or
                    Configuration.algorithm == "knn"):
            if (Configuration.data=="Social_Network_Ads.csv"):
                self.X = self.dataset.iloc[:, [2,3]].values
                self.y = self.dataset.iloc[:, 4].values
            else:
                if (Configuration.data == "moons"):
                    from sklearn.datasets.samples_generator import make_moons
                    self.X, self.y = make_moons(100, noise=.2, random_state = 0)
                elif (Configuration.data == "circles"):
                    from sklearn.datasets.samples_generator import make_circles
                    self.X, self.y = make_circles(100, factor=.5, noise=.1, random_state = 0)
        elif (Configuration.algorithm == "polynomial_regression"):
            self.X = self.dataset.iloc[:, 1:2].values
            self.y = self.dataset.iloc[:, 2].values
        elif (Configuration.algorithm == "kmeans"):
            self.X = self.dataset.iloc[:, [3, 4]].values
            self.y = None

        if (Configuration.data == "Social_Network_Ads.csv"):
            self.directory = "SocialNetworkAds"
        elif (Configuration.data == "moons"):
            self.directory = "Moons"
        elif (Configuration.data == "circles"):
            self.directory = "Circles"
Example #4
0
def ex2():
    X, y = make_circles(100, factor=.1, noise=.1)
    clf = SVC(kernel='linear').fit(X, y)
    plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='summer')
    plt.show()
    plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='summer')
    plot_svc_decision_function(clf);
    plt.show()

    r = np.exp(-(X[:, 0] ** 2 + X[:, 1] ** 2))
    ax = plt.subplot(projection='3d')
    ax.scatter3D(X[:, 0], X[:, 1], r, c=y, s=50, cmap='summer')
    ax.view_init(elev=30, azim=30)
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('r')
    plt.show()
    clf = SVC(kernel='rbf')
    clf.fit(X, y)

    plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='summer')
    plot_svc_decision_function(clf)
    plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1],
                s=200, facecolors='none');
    plt.show()
    pass
Example #5
0
    def make_circles(self, ax=None, plot=False):
        if ax is None:
            fig, ax = plt.subplots(1, 1, figsize=(12, 6))

        X, y = make_circles(100, factor=.1, noise=.1)

        if plot:
            mask = y > 0
            ax.scatter(X[mask, 0],
                       X[mask, 1],
                       c=self.val_to_color(y[mask]),
                       s=50,
                       label="Positive")
            ax.scatter(X[~mask, 0],
                       X[~mask, 1],
                       c=self.val_to_color(y[~mask]),
                       s=50,
                       label="Negative")

            # plt.scatter(X[:, 0], X[:, 1], c=self.val_to_color(y), s=50, cmap='autumn')
            ax.set_xlabel("$x_1$")
            ax.set_ylabel("$x_2$")
            ax.legend()

        return X, y
Example #6
0
def main(_):
    cut = int(FLAGS.n_samples * 0.7)

    start = time.time()

    data, features = make_circles(n_samples=FLAGS.n_samples, shuffle=True, noise=0.12, factor=0.4)
    tr_data, tr_features = data[:cut], features[:cut]
    te_data, te_features = data[cut:], features[cut:]
    test = []

    fig, ax = plt.subplots()
    ax.scatter(tr_data[:, 0], tr_data[:, 1],
               marker='o', s=100, c=tr_features, cmap=plt.cm.coolwarm)
    plt.plot()
    plt.show()

    with tf.Session() as sess:
        for i, j in zip(te_data, te_features):
            distances = tf.reduce_sum(tf.square(tf.subtract(i, tr_data)), axis=1)
            neighbor = tf.arg_min(distances, 0)

            test.append(tr_features[sess.run(neighbor)])

    fig, ax = plt.subplots()
    ax.scatter(te_data[:, 0], te_data[:, 1],
               marker='o', s=100, c=test, cmap=plt.cm.coolwarm)
    plt.plot()
    plt.show()

    end = time.time()
    print("Found in %.2f seconds" % (end-start))
    print("Cluster assignments:", test)
Example #7
0
def kernel_model_rbf():
    x, y = make_circles(100, factor=.1, noise=.1)
    clf = SVC(kernel='rbf', C=1E6).fit(x, y)
    plt.scatter(x[:, 0], x[:, 1], c=y, s=50, cmap='autumn')
    plot_svc_decision_function(clf, plot_support=False)
    plot_3d(x, y)
    plt.show()
def get_toy_classification_data(n_samples=100, centers=3, n_features=2, type_data = "blobs"):
    # generate 2d classification dataset
    if (type_data == "blobs"):
        X, y = make_blobs(n_samples=n_samples, centers=centers, n_features=n_features)
    elif(type_data == "moons"):
        X, y = make_moons(n_samples=n_samples, noise=0.1)
    elif(type_data == "circles"):
        X, y =  make_circles(n_samples=n_samples, noise=0.05)
    # scatter plot, dots colored by class value
#    df = DataFrame(dict(x=X[:,0], y=X[:,1], label=y))
#    colors = {0:'red', 1:'blue', 2:'green'}
#    fig, ax = pyplot.subplots()
#    grouped = df.groupby('label')
#    for key, group in grouped:
#        group.plot(ax=ax, kind='scatter', x='x', y='y', label=key, color=colors[key])
#    pyplot.show()
    
    X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.25, stratify = None)
    
    classes = np.unique(y_train)
    
    if(0):
        enc = OneHotEncoder().fit(classes.reshape(-1,1))
        
        y_train = enc.transform(y_train.reshape(-1, 1))
        print (y_test)
        y_test = enc.transform(y_test.reshape(-1, 1))
        print (y_test)
    
    y_train = one_hot_encode(y_train, classes)
    y_test = one_hot_encode(y_test, classes)
    
    return  X_train, y_train, X_test, y_test, classes
Example #9
0
def _get_circles(*args, **kwargs):
    X, y = make_circles(n_samples=100, noise=0.1)
    metadata = {
        'regression': False,
        'scoring': classifier_scoring,
        'primary_metric': 'accuracy',
    }
    return X, y, metadata
Example #10
0
    def make_circles(self, plot=False):
        X, y = make_circles(100, factor=.1, noise=.1)

        if plot:
            plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn')
            plt.xlabel("$x_1$")
            plt.ylabel("$x_2$")

        return X, y
Example #11
0
def generate_circle_data(N=100, seed=1):
    np.random.seed(seed)
    X, y = make_circles(N, factor=.1, noise=.1)
    y[y == 0] = -1
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)

    return X_train.tolist(), X_test.tolist(), y_train.tolist(), y_test.tolist()
Example #12
0
def kernel_model_linear():
    """
    线性核函数
    :return:
    """
    x, y = make_circles(100, factor=.1, noise=.1)
    clf = SVC(kernel='linear').fit(x, y)
    plt.scatter(x[:, 0], x[:, 1], c=y, s=50, cmap='autumn')
    plot_svc_decision_function(clf, plot_support=False)
    plot_3d(x, y)
    plt.show()
def spectral_clustering(g_directed):
    # X=np.array.g_directed.edges()
    # W = pairwise_distances(X, metric="euclidean")
    # vectorizer = np.vectorize(lambda x: 1 if x < 5 else 0)
    # W = np.vectorize(vectorizer)(W)
    # print(W)
    W = nx.adjacency_matrix(g_directed)
    print(W.todense())
    D = np.diag(np.sum(np.array(W.todense()), axis=1))
    print('degree matrix:')
    print(D)
    L = D - W
    print('laplacian matrix:')
    print(L)
    e, v = np.linalg.eig(L)
    # eigenvalues
    print('eigenvalues:')
    print(e)
    # eigenvectors
    print('eigenvectors:')
    print(v)
    i = np.where(e < 0.5)[0]
    x = 1
    U = np.array(v[:, i[1]])

    km = KMeans(init='k-means++', n_clusters=3)
    km.fit(U)
    km.labels_
    X, clusters = make_circles(n_samples=1000,
                               noise=.05,
                               factor=.5,
                               random_state=0)
    plt.scatter(X[:, 0], X[:, 1])

    # Using K-means
    km = KMeans(init='k-means++', n_clusters=2)
    km_clustering = km.fit(X)
    plt.scatter(X[:, 0],
                X[:, 1],
                c=km_clustering.labels_,
                cmap='rainbow',
                alpha=0.7,
                edgecolors='b')
    # Using Spectral Clustering  scitkit-learn’s implementation
    sc = SpectralClustering(n_clusters=2,
                            affinity='nearest_neighbors',
                            random_state=0)
    sc_clustering = sc.fit(X)
    plt.scatter(X[:, 0],
                X[:, 1],
                c=sc_clustering.labels_,
                cmap='rainbow',
                alpha=0.7,
                edgecolors='b')
Example #14
0
def nonLinearSeparable():
    from sklearn.datasets.samples_generator import make_circles

    X, y = make_circles(100, factor=0.1, noise=.1)
    plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn')
    plt.show()

    from sklearn import svm
    clf = svm.SVC(kernel='rbf', C=1E6)
    clf.fit(X, y)
    plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn')
    plot_decision_function(clf)
Example #15
0
    def get_dataset(self):
        if self.type == 'moon':
            datas, labels = make_moons(n_samples=self.n_samples,
                                       noise=self.noise)
        elif self.type == 'circle':
            datas, labels = make_circles(n_samples=self.n_samples,
                                         noise=self.noise)
        else:
            print('wrong dataset type input.')

        dataset = {}
        dataset['data'] = datas
        dataset['target'] = labels
        return dataset
def train_svm_plus():
    # 二维圆形数据 factor 内外圆比例(0, 1)
    X, y = make_circles(100, factor=0.1, noise=0.1)
    # 加入径向基函数
    clf = SVC(kernel='rbf')
    clf.fit(X, y)

    plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn')
    plot_SVC_decision_function(clf, plot_support=False)
    plt.scatter(clf.support_vectors_[:, 0],
                clf.support_vectors_[:, 1],
                s=300,
                lw=1,
                facecolors='none')
    return X, y
Example #17
0
def test_mapper_filterer():
    data, labels = make_circles(n_samples=2000, noise=0.03, factor=0.3)
    params = {
        'coverer__intervals': 10,
        'coverer__overlap': 0.1,
        'clusterer__min_samples': 3,
        'clusterer__eps': 0.5
    }
    m_filter = Mapper(filterer=MinMaxScaler(), params=params)
    m_nofilter = Mapper(filterer=MinMaxScaler(), params=params)
    scaled_data = MinMaxScaler().fit_transform(data)
    m_filter.fit(data)
    m_nofilter.fit(data, scaled_data)
    assert_true(m_filter.links_ == m_nofilter.links_)
    assert_true(len(m_filter.nodes_) == len(m_nofilter.nodes_))
Example #18
0
def single_cluster(dim, samples, std, clusters, cluster_func, x=None, y=None):
    # x, y = make_blobs(n_samples=samples, centers=clusters, n_features=dim, random_state=0, cluster_std=std)
    if x is None or y is None:
        x, y = samples_generator.make_circles(n_samples=samples,
                                              random_state=True,
                                              factor=0.3,
                                              noise=0.05)

    _y = cluster_func(x, clusters)

    acc = sklearn.metrics.homogeneity_score(y, _y)

    hyp.plot(x, '.', group=y, save_path='or.png')
    hyp.plot(x, '.', group=_y, save_path='grp.png')

    print('Accuracy {0:0.2f}'.format(acc))
Example #19
0
def test_graph_simple():
    data, labels = make_circles(n_samples=2000, noise=0.03, factor=0.3)
    params = {
        'coverer__intervals': 10,
        'coverer__overlap': 0.1,
        'clusterer__min_samples': 3,
        'clusterer__eps': 0.5
    }
    m = Mapper(params=params)
    scaled_data = MinMaxScaler().fit_transform(data)
    m.fit(data, scaled_data)
    categories = {"labels": labels}
    scales = {"y[0]": scaled_data[:, 0], "y[1]": scaled_data[:, 1]}

    json_graph_str = json_graph(m, categories, scales)
    # check if it can be loaded to validate html
    json_graph_dict = json.loads(json_graph_str)
    html_graph_str = html_graph(m, categories, scales)  # validate HTML?
Example #20
0
def choose_dataset(chosen_dataset, n_points):
    X = None

    if chosen_dataset == "blobs":
        X = make_blobs(n_samples=n_points,
                       centers=4,
                       n_features=2,
                       cluster_std=1.5,
                       random_state=42)[0]
    elif chosen_dataset == "moons":
        X = make_moons(n_samples=n_points, noise=0.05, random_state=42)[0]
    elif chosen_dataset == "scatter":
        X = make_blobs(n_samples=n_points,
                       cluster_std=[2.5, 2.5, 2.5],
                       random_state=42)[0]
    elif chosen_dataset == "circle":
        X = make_circles(n_samples=n_points, noise=0, random_state=42)[0]

    return X
Example #21
0
def main():
    # 过滤警告
    warnings.filterwarnings("ignore")

    # 创建“点滴”数据
    # x, y = samples_generator.make_blobs(n_samples=200, centers=2, cluster_std=1, random_state=0)
    # 创建“月牙”数据
    # x, y = samples_generator.make_moons(n_samples=200, noise=0.05, random_state=0)
    # 创建“环形”数据
    x, y = samples_generator.make_circles(n_samples=200, noise=0.05, random_state=0, factor=0.4)

    """
    创建七种聚类算法
    """
    # clusters = cluster.KMeans(2)  # K-means++
    # clusters = cluster.MeanShift()  # 均值迁移
    # clusters = cluster. AgglomerativeClustering(2)  # 层聚类
    # clusters = cluster.AffinityPropagation()  # AP聚类
    # clusters = cluster.SpectralClustering(n_clusters=2, affinity="nearest_neighbors")  # 谱聚类
    # clusters = cluster.DBSCAN(eps=0.55, min_samples=5)  # 密度聚类
    clusters = GaussianMixture(n_components=2)  # 高斯分布

    # 拟合
    _x = clusters.fit_predict(x)

    """
    三种评价方法
    """
    # 1.轮廓系数
    print(metrics.silhouette_score(x, _x))
    # 2.CH指数
    print(metrics.calinski_harabasz_score(x, _x))
    # 3.戴维森堡丁指数
    print(metrics.davies_bouldin_score(x, _x))

    # 绘图
    plt.scatter(x[:, 0], x[:, 1], c=_x, cmap='viridis')
    plt.show()
Example #22
0
File: DBSCAN.py Project: nanmi/XM
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets.samples_generator import make_circles, make_blobs

from sklearn.cluster import DBSCAN

from sklearn.metrics import calinski_harabaz_score

# Create cluster dataset
X1, y1 = make_circles(n_samples=5000, factor=0.6, noise=0.05, random_state=666)
X2, y2 = make_blobs(n_samples=1000, n_features=2, centers=[[1.2, 1.2]], \
                    cluster_std=[[0.1]], random_state=666)

X = np.concatenate((X1, X2))
y = np.concatenate((y1, y2))

# Create cluster model
dbscan = DBSCAN(eps=0.1, min_samples=5)

y_predict = dbscan.fit_predict(X)

print('Calinski-Harabasz Index Score: ', calinski_harabaz_score(X, y_predict))

# Visualization
plt.scatter(X[:, 0], X[:, 1], c=y_predict, marker='o', edgecolors='black')

plt.xlabel('X')
plt.ylabel('y')
plt.title('DBSCAN Cluster Algorithm')

plt.show()
Example #23
0
import time

import matplotlib
import matplotlib.pyplot as plt

from sklearn.datasets.samples_generator import make_circles

N = 210
K = 2
MAX_ITERS = 1000
cut = int(N * 0.7)

start = time.time()

data, features = make_circles(n_samples=N,
                              shuffle=True,
                              noise=0.12,
                              factor=0.4)
# print(data.shape)
# print(data)
# print(features.shape)

tr_data, tr_features = data[:cut], features[:cut]
te_data, te_features = data[cut:], features[cut:]
test = []

fig, ax = plt.subplots()
ax.scatter(tr_data.transpose()[0],
           tr_data.transpose()[1],
           marker='o',
           s=100,
           c=tr_features,
Example #24
0
import matplotlib
import matplotlib.pyplot as plt

from sklearn.datasets.samples_generator import make_blobs
from sklearn.datasets.samples_generator import make_circles

DATA_TYPE = 'blobs'

# Number of clusters, if we choose circles, only 2 will be enough
if (DATA_TYPE == 'circle'):
 K = 2
else:
 K = 4

# Maximum number of iterations, if the conditions are not met
MAX_ITERS = 1000
N = 200
start = time.time()
centers = [(-2, -2), (-2, 1.5), (1.5, -2), (2, 1.5)]

if (DATA_TYPE == 'circle'):
 data, features = make_circles(n_samples=200, shuffle=True, noise=0.01, factor=0.4)
else:
 data, features = make_blobs(n_samples=200, centers=centers, n_features=2, cluster_std=0.8, shuffle=False, random_state=42)

fig, ax = plt.subplots()
ax.scatter(np.asarray(centers).transpose()[0], np.asarray(centers).transpose()[1], marker = 'o', s = 250)
plt.show()

print("TEST")
Example #25
0
def main(selectedDataset="digits", pop_size=100, max_generations=100):

    # a few hard-coded values
    figsize = [5, 4]
    seed = 42
    #	pop_size = 300
    offspring_size = 2 * pop_size
    #	max_generations = 300
    maximize = False
    #	selectedDataset = "circles"
    selectedClassifiers = ["SVC"]

    # a list of classifiers
    allClassifiers = [
        [RandomForestClassifier, "RandomForestClassifier", 1],
        [BaggingClassifier, "BaggingClassifier", 1],
        [SVC, "SVC", 1],
        [RidgeClassifier, "RidgeClassifier", 1],
        #			[AdaBoostClassifier, "AdaBoostClassifier", 1],
        #			[ExtraTreesClassifier, "ExtraTreesClassifier", 1],
        #			[GradientBoostingClassifier, "GradientBoostingClassifier", 1],
        #			[SGDClassifier, "SGDClassifier", 1],
        #			[PassiveAggressiveClassifier, "PassiveAggressiveClassifier", 1],
        #			[LogisticRegression, "LogisticRegression", 1],
    ]

    selectedClassifiers = [classifier[1] for classifier in allClassifiers]

    folder_name = datetime.datetime.now().strftime(
        "%Y-%m-%d-%H-%M") + "-archetypes-" + selectedDataset + "-" + str(
            pop_size)
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
    else:
        sys.stderr.write("Error: folder \"" + folder_name +
                         "\" already exists. Aborting...\n")
        sys.exit(0)
    # open the logging file
    logfilename = os.path.join(folder_name, 'logfile.log')
    logger = setup_logger('logfile_' + folder_name, logfilename)
    logger.info("All results will be saved in folder \"%s\"" % folder_name)

    # load different datasets, prepare them for use
    logger.info("Preparing data...")
    # synthetic databases
    centers = [[1, 1], [-1, -1], [1, -1]]
    blobs_X, blobs_y = make_blobs(n_samples=400,
                                  centers=centers,
                                  n_features=2,
                                  cluster_std=0.6,
                                  random_state=seed)
    circles_X, circles_y = make_circles(n_samples=400,
                                        noise=0.15,
                                        factor=0.4,
                                        random_state=seed)
    moons_X, moons_y = make_moons(n_samples=400, noise=0.2, random_state=seed)
    iris = datasets.load_iris()
    digits = datasets.load_digits()
    #	forest_X, forest_y = loadForestCoverageType() # local function
    mnist_X, mnist_y = loadMNIST()  # local function

    dataList = [
        [blobs_X, blobs_y, 0, "blobs"],
        [circles_X, circles_y, 0, "circles"],
        [moons_X, moons_y, 0, "moons"],
        [iris.data, iris.target, 0, "iris4"],
        [iris.data[:, 2:4], iris.target, 0, "iris2"],
        [digits.data, digits.target, 0, "digits"],
        #			[forest_X, forest_y, 0, "covtype"],
        [mnist_X, mnist_y, 0, "mnist"]
    ]

    # argparse; all arguments are optional
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--classifiers",
        "-c",
        nargs='+',
        help="Classifier(s) to be tested. Default: %s. Accepted values: %s" %
        (selectedClassifiers[0], [x[1] for x in allClassifiers]))
    parser.add_argument(
        "--dataset",
        "-d",
        help="Dataset to be tested. Default: %s. Accepted values: %s" %
        (selectedDataset, [x[3] for x in dataList]))

    parser.add_argument("--pop_size",
                        "-p",
                        type=int,
                        help="EA population size. Default: %d" % pop_size)
    parser.add_argument("--offspring_size",
                        "-o",
                        type=int,
                        help="Ea offspring size. Default: %d" % offspring_size)
    parser.add_argument("--max_generations",
                        "-mg",
                        type=int,
                        help="Maximum number of generations. Default: %d" %
                        max_generations)

    # finally, parse the arguments
    args = parser.parse_args()

    # a few checks on the (optional) inputs
    if args.dataset:
        selectedDataset = args.dataset
        if selectedDataset not in [x[3] for x in dataList]:
            logger.info(
                "Error: dataset \"%s\" is not an accepted value. Accepted values: %s"
                % (selectedDataset, [x[3] for x in dataList]))
            sys.exit(0)

    if args.classifiers != None and len(args.classifiers) > 0:
        selectedClassifiers = args.classifiers
        for c in selectedClassifiers:
            if c not in [x[1] for x in allClassifiers]:
                logger.info(
                    "Error: classifier \"%s\" is not an accepted value. Accepted values: %s"
                    % (c, [x[1] for x in allClassifiers]))
                sys.exit(0)

    if args.max_generations: max_generations = args.max_generations
    if args.pop_size: pop_size = args.pop_size
    if args.offspring_size: offspring_size = args.offspring_size

    # TODO: check that min_points < max_points and max_generations > 0

    # print out the current settings
    logger.info("Settings of the experiment...")
    logger.info("Fixed random seed: %d" % (seed))
    logger.info("Selected dataset: %s; Selected classifier(s): %s" %
                (selectedDataset, selectedClassifiers))
    logger.info(
        "Population size in EA: %d; Offspring size: %d; Max generations: %d" %
        (pop_size, offspring_size, max_generations))

    # create the list of classifiers
    classifierList = [x for x in allClassifiers if x[1] in selectedClassifiers]

    # pick the dataset
    db_index = -1
    for i in range(0, len(dataList)):
        if dataList[i][3] == selectedDataset:
            db_index = i

    dbname = dataList[db_index][3]

    X, y = dataList[db_index][0], dataList[db_index][1]
    number_classes = np.unique(y).shape[0]

    logger.info("Creating train/test split...")
    from sklearn.model_selection import StratifiedKFold
    skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=seed)
    listOfSplits = [split for split in skf.split(X, y)]
    trainval_index, test_index = listOfSplits[0]
    X_trainval, y_trainval = X[trainval_index], y[trainval_index]
    X_test, y_test = X[test_index], y[test_index]
    skf = StratifiedKFold(n_splits=3, shuffle=False, random_state=seed)
    listOfSplits = [split for split in skf.split(X_trainval, y_trainval)]
    train_index, val_index = listOfSplits[0]
    X_train, y_train = X_trainval[train_index], y_trainval[train_index]
    X_val, y_val = X_trainval[val_index], y_trainval[val_index]
    logger.info(
        "Training set: %d lines (%.2f%%); test set: %d lines (%.2f%%)" %
        (X_train.shape[0],
         (100.0 * float(X_train.shape[0] / X.shape[0])), X_test.shape[0],
         (100.0 * float(X_test.shape[0] / X.shape[0]))))

    # rescale data
    scaler = StandardScaler()
    sc = scaler.fit(X_train)
    X = sc.transform(X)
    X_trainval = sc.transform(X_trainval)
    X_train = sc.transform(X_train)
    X_val = sc.transform(X_val)
    X_test = sc.transform(X_test)

    for classifier in classifierList:

        classifier_name = classifier[1]

        # start creating folder name
        experiment_name = os.path.join(
            folder_name,
            datetime.datetime.now().strftime("%Y-%m-%d-%H-%M") +
            "-archetypes-evolution-" + dbname + "-" + classifier_name)
        if not os.path.exists(experiment_name): os.makedirs(experiment_name)

        logger.info("Classifier used: " + classifier_name)

        start = time.time()
        solutions, trainAccuracy, testAccuracy = evolveArchetypes(
            X,
            y,
            X_train,
            y_train,
            X_test,
            y_test,
            classifier,
            pop_size,
            offspring_size,
            max_generations,
            number_classes=number_classes,
            maximize=maximize,
            seed=seed,
            experiment_name=experiment_name)
        end = time.time()
        exec_time = end - start

        # only candidates with all classes are considered
        final_archive = []
        for sol in solutions:
            c = sol.candidate
            c = np.array(c)
            y_core = c[:, -1]
            if len(set(y_core)) == number_classes:
                final_archive.append(sol)

        logger.info("Now saving final Pareto front in a figure...")
        pareto_front_x = [f.fitness[0] for f in final_archive]
        pareto_front_y = [f.fitness[1] for f in final_archive]

        figure = plt.figure(figsize=figsize)
        ax = figure.add_subplot(111)
        ax.plot(pareto_front_x,
                pareto_front_y,
                "bo-",
                label="Solutions in final archive")
        ax.set_title("Optimal solutions")
        ax.set_xlabel("Archetype set size")
        ax.set_ylabel("Error")
        ax.set_xlim([1, X_train.shape[0]])
        ax.set_ylim([0, 0.4])
        plt.tight_layout()
        plt.savefig(
            os.path.join(
                experiment_name,
                "%s_EvoArch_%s_pareto.png" % (dbname, classifier_name)))
        plt.savefig(
            os.path.join(
                experiment_name,
                "%s_EvoArch_%s_pareto.pdf" % (dbname, classifier_name)))
        plt.close(figure)

        figure = plt.figure(figsize=figsize)
        ax = figure.add_subplot(111)
        ax.plot(pareto_front_x,
                pareto_front_y,
                "bo-",
                label="Solutions in final archive")
        ax.set_title("Optimal solutions")
        ax.set_xlabel("Archetype set size")
        ax.set_ylabel("Error")
        plt.tight_layout()
        plt.savefig(
            os.path.join(
                experiment_name,
                "%s_EvoArch_%s_pareto_zoom.png" % (dbname, classifier_name)))
        plt.savefig(
            os.path.join(
                experiment_name,
                "%s_EvoArch_%s_pareto_zoom.pdf" % (dbname, classifier_name)))
        plt.close(figure)

        # initial performance
        X_err, testAccuracy, model, fail_points, y_pred = evaluate_core(
            X_trainval,
            y_trainval,
            X_test,
            y_test,
            classifier[0],
            cname=classifier_name,
            SEED=seed)
        X_err, trainAccuracy, model, fail_points, y_pred = evaluate_core(
            X_trainval,
            y_trainval,
            X_trainval,
            y_trainval,
            classifier[0],
            cname=classifier_name,
            SEED=seed)
        logger.info("Compute performances!")
        logger.info("Elapsed time (seconds): %.4f" % (exec_time))
        logger.info("Initial performance: train=%.4f, test=%.4f, size: %d" %
                    (trainAccuracy, testAccuracy, X_train.shape[0]))

        # best solution
        accuracy = []
        for sol in final_archive:
            c = sol.candidate
            c = np.array(c)
            X_core = c[:, :-1]
            y_core = c[:, -1]
            X_err, accuracy_val, model, fail_points, y_pred = evaluate_core(
                X_core,
                y_core,
                X_val,
                y_val,
                classifier[0],
                cname=classifier_name,
                SEED=seed)
            X_err, accuracy_train, model, fail_points, y_pred = evaluate_core(
                X_core,
                y_core,
                X_train,
                y_train,
                classifier[0],
                cname=classifier_name,
                SEED=seed)
            accuracy.append(np.mean([accuracy_val, accuracy_train]))

        best_ids = np.array(np.argsort(accuracy)).astype('int')[::-1]
        count = 0
        for i in best_ids:

            if count > 2:
                break

            c = final_archive[i].candidate
            c = np.array(c)

            X_core = c[:, :-1]
            y_core = c[:, -1]

            X_err, accuracy_train, model, fail_points, y_pred = evaluate_core(
                X_core,
                y_core,
                X_train,
                y_train,
                classifier[0],
                cname=classifier_name,
                SEED=seed)
            X_err, accuracy_val, model, fail_points, y_pred = evaluate_core(
                X_core,
                y_core,
                X_val,
                y_val,
                classifier[0],
                cname=classifier_name,
                SEED=seed)
            X_err, accuracy, model, fail_points, y_pred = evaluate_core(
                X_core,
                y_core,
                X_test,
                y_test,
                classifier[0],
                cname=classifier_name,
                SEED=seed)
            logger.info(
                "Minimal train/val error: train: %.4f, val: %.4f; test: %.4f, size: %d"
                % (accuracy_train, accuracy_val, accuracy, X_core.shape[0]))

            if False:  #(dbname == "mnist" or dbname == "digits") and count == 0:

                if dbname == "mnist":
                    H, W = 28, 28
                if dbname == "digits":
                    H, W = 8, 8

                logger.info("Now saving figures...")

                # save archetypes
                for index in range(0, len(y_core)):
                    image = np.reshape(X_core[index, :], (H, W))
                    plt.figure()
                    plt.axis('off')
                    plt.imshow(image, cmap=plt.cm.gray_r)
                    plt.title('Label: %d' % (y_core[index]))
                    plt.tight_layout()
                    plt.savefig(
                        os.path.join(
                            experiment_name,
                            "digit_%d_idx_%d.pdf" % (y_core[index], index)))
                    plt.savefig(
                        os.path.join(
                            experiment_name,
                            "digit_%d_idx_%d.png" % (y_core[index], index)))
                    plt.close()

                # save test errors
                e = 1
                for index in range(0, len(y_test)):
                    if fail_points[index] == True:
                        image = np.reshape(X_test[index, :], (H, W))
                        plt.figure()
                        plt.axis('off')
                        plt.imshow(image, cmap=plt.cm.gray_r)
                        plt.title('Label: %d - Prediction: %d' %
                                  (y_test[index], y_pred[index]))
                        plt.savefig(
                            os.path.join(
                                experiment_name,
                                "err_lab_%d_pred_%d_idx_%d.pdf" %
                                (y_test[index], y_pred[index], e)))
                        plt.savefig(
                            os.path.join(
                                experiment_name,
                                "err_lab_%d_pred_%d_idx_%d.png" %
                                (y_test[index], y_pred[index], e)))
                        plt.close()
                        e = e + 1

            # plot decision boundaries if we have only 2 dimensions!
            if X.shape[1] == 2:

                cmap = ListedColormap(sns.color_palette("bright", 3).as_hex())
                xx, yy = make_meshgrid(X[:, 0], X[:, 1])
                figure = plt.figure(figsize=figsize)
                _, Z_0 = plot_contours(model, xx, yy, colors='k', alpha=0.2)
                #			plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cmap, marker='s', alpha=0.4, label="train")
                plt.scatter(X_test[:, 0],
                            X_test[:, 1],
                            c=y_test,
                            cmap=cmap,
                            marker='+',
                            alpha=0.3,
                            label="test")
                plt.scatter(X_core[:, 0],
                            X_core[:, 1],
                            c=y_core,
                            cmap=cmap,
                            marker='D',
                            facecolors='none',
                            edgecolors='none',
                            alpha=1,
                            label="archetypes")
                plt.scatter(X_err[:, 0],
                            X_err[:, 1],
                            marker='x',
                            facecolors='k',
                            edgecolors='k',
                            alpha=1,
                            label="errors")
                plt.legend()
                plt.title("%s - acc. %.4f" % (classifier_name, accuracy))
                plt.tight_layout()
                plt.savefig(
                    os.path.join(
                        experiment_name, "%s_EvoArch_%s_%d.png" %
                        (dbname, classifier_name, count)))
                plt.savefig(
                    os.path.join(
                        experiment_name, "%s_EvoArch_%s_%d.pdf" %
                        (dbname, classifier_name, count)))
                plt.close(figure)

                if count == 0:
                    # using all samples in the training set
                    X_err, accuracy, model, fail_points, y_pred = evaluate_core(
                        X_trainval,
                        y_trainval,
                        X_test,
                        y_test,
                        classifier[0],
                        cname=classifier_name,
                        SEED=seed)
                    X_err_train, trainAccuracy, model_train, fail_points_train, y_pred_train = evaluate_core(
                        X_trainval,
                        y_trainval,
                        X_trainval,
                        y_trainval,
                        classifier[0],
                        cname=classifier_name,
                        SEED=seed)

                    figure = plt.figure(figsize=figsize)
                    _, Z_0 = plot_contours(model,
                                           xx,
                                           yy,
                                           colors='k',
                                           alpha=0.2)
                    plt.scatter(X_trainval[:, 0],
                                X_trainval[:, 1],
                                c=y_trainval,
                                cmap=cmap,
                                marker='s',
                                alpha=0.4,
                                label="train")
                    plt.scatter(X_test[:, 0],
                                X_test[:, 1],
                                c=y_test,
                                cmap=cmap,
                                marker='+',
                                alpha=0.4,
                                label="test")
                    plt.scatter(X_err[:, 0],
                                X_err[:, 1],
                                marker='x',
                                facecolors='k',
                                edgecolors='k',
                                alpha=1,
                                label="errors")
                    plt.legend()
                    plt.title("%s - acc. %.4f" % (classifier_name, accuracy))
                    plt.tight_layout()
                    plt.savefig(
                        os.path.join(
                            experiment_name, "%s_EvoArch_%s_alltrain.png" %
                            (dbname, classifier_name)))
                    plt.savefig(
                        os.path.join(
                            experiment_name, "%s_EvoArch_%s_alltrain.pdf" %
                            (dbname, classifier_name)))
                    plt.close(figure)

            count = count + 1

    logger.handlers.pop()

    return
Example #26
0
            if q in oldCoreObjs.keys():
                delte = [val for val in oldCoreObjs[q]
                         if val in notAccess]  #Δ = N(q)∩Γ
                queue.extend(delte)  #将Δ中的样本加入队列Q
                notAccess = [val for val in notAccess
                             if val not in delte]  #Γ = Γ\Δ
        k += 1
        C[k] = [val for val in OldNotAccess if val not in notAccess]
        for x in C[k]:
            if x in coreObjs.keys():
                del coreObjs[x]
    return C


if __name__ == '__main__':
    X, y_true = make_circles(n_samples=1000, noise=0.15)  # 随机生成1000个圆环形状数据
    print(X)
    print(y_true)

    plt.scatter(X[:, 0], X[:, 1], c=y_true)
    plt.show()

    # DBSCAN 算法
    t0 = time.time()
    y_pred = DBSCAN(eps=.1, min_samples=6).fit_predict(X)  # 该算法对应的两个参数
    t = time.time() - t0
    plt.scatter(X[:, 0], X[:, 1], c=y_pred)
    plt.title('time : %f' % t)
    plt.show()

    # eps为距离阈值ϵ,min_samples 为邻域样本数阈值MinPts, X为数据
Example #27
0
    ax = ax or plt.gca()
    ax.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn')
    ax.set_xlim(-1, 4)
    ax.set_ylim(-1, 6)
    plot_svc_decision_function(model, ax)


fig, ax = plt.subplots(1, 2, figsize=(16, 6))
fig.subplots_adjust(left=0.0625, right=0.95, wspace=0.1)
for axi, N in zip(ax, [60, 120]):
    plot_svm(N, axi)
    axi.set_title('N = {0}'.format(N))

# 引入核函数的SVM
from sklearn.datasets.samples_generator import make_circles
X, y = make_circles(100, factor=.1, noise=.1)

clf = SVC(kernel='linear').fit(X, y)

plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn')
plot_svc_decision_function(clf, plot_support=False)
plt.show()

#加入了新的维度r
from mpl_toolkits import mplot3d
r = np.exp(-(X**2).sum(1))


def plot_3D(elev=30, azim=30, X=X, y=y):
    ax = plt.subplot(projection='3d')
    ax.scatter3D(X[:, 0], X[:, 1], r, c=y, s=50, cmap='autumn')
Example #28
0
    ylim = ax.get_ylim()
    xx = np.linspace(xlim[0], xlim[1], 200)
    yy = np.linspace(ylim[0], ylim[1], 200)
    YY, XX = np.meshgrid(yy, xx)
    xy = np.vstack([XX.ravel(), YY.ravel()]).T

    Z = model.decision_function(xy).reshape(XX.shape)

    # plot decision boundary and margins
    ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5,
               linestyles=['--', '-', '--'])
    # plot support vectors
    ax.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=300,
               linewidth=1, facecolors='none', edgecolors='k')
    plt.show()

    y1_model = model.predict(X_train)
    y2_model = model.predict(X_test)

    print('Accuracy on train data:',accuracy_score(Y_train, y1_model))
    print('Accuracy on test data:',accuracy_score(Y_test, y2_model))

X,Y=make_circles(n_samples=200,noise=0.05,factor=0.5)
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.3)

svclassifier = svm.SVC(kernel='linear',C=1E3)

svclassifier.fit(X_train, Y_train)
plot_model(svclassifier)
print('accuracy test = ' ,accuracy_score(Y_test , svclassifier.predict(X_test)))
print('accuracy train = ' ,accuracy_score(Y_train , svclassifier.predict(X_train)))
Example #29
0
            P[i, j] = clf.decision_function([xi, yj])
    # plot the margins
    ax.contour(X, Y, P, colors='k',
               levels=[-1, 0, 1], alpha=0.5,
               linestyles=['--', '-', '--'])
plt.subplot(412)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='spring')
plot_svc_decision_function(clf)
plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1],
            s=200, facecolors='none')

"""
circles
"""
from sklearn.datasets.samples_generator import make_circles
X, y = make_circles(100, factor=.1, noise=.1)

clf = SVC(kernel='linear').fit(X, y)
plt.subplot(413)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='spring')
plot_svc_decision_function(clf)

#
clf = SVC(kernel='rbf')
clf.fit(X, y)
plt.subplot(414)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='spring')
plot_svc_decision_function(clf)
plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1],
            s=200, facecolors='none')
plt.show()
# -*- coding: utf-8 -*-
"""Demo108_PCA_Circles.ipynb

# **Tame Your Python**
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
print(tf.__version__)

from sklearn.datasets.samples_generator import make_circles
n = 100
# generate 2d classification dataset
X, y = make_circles(n_samples=n)
# scatter plot, dots colored by class value
df = pd.DataFrame(dict(x=X[:,0], y=X[:,1], label=y))
colors = {0:'red', 1:'blue'}
fig, ax = plt.subplots()
grouped = df.groupby('label')
for key, group in grouped:
    group.plot(ax=ax, kind='scatter', x='x', y='y', label=key, color=colors[key])
plt.show()

datadict = {'X1': X[:,0],'X2' : X[:,1], 'target': y}
data = pd.DataFrame(data=datadict)

X = data.iloc[:,[0, 1]].values
type(X)
Example #31
0
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets.samples_generator import make_circles
from sklearn.svm import SVC
from DecisionBoundary import plot_svm_margin

# Creating a toy data with circles
X, y = make_circles(100, factor=.1, noise=.1, random_state=88)

# plotting the data
plt.figure(figsize=[6, 6])
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=plt.cm.coolwarm)
plt.show()

# calculating the radius
r = np.sum(X**2, axis=1)**0.5

# plotting the data, y-axis replaced with the radius
plt.figure(figsize=[6, 6])
plt.scatter(X[:, 0], r, c=y, s=50, cmap=plt.cm.coolwarm)
plt.show()

# SVM
R = np.vstack([X[:, 0], r]).T
sv = SVC(kernel='linear', C=10000)
sv.fit(R, y)

# plotting the margin on the SVM of the transformed data
plt.figure(figsize=[6, 6])
plot_svm_margin(R, y, sv)
plt.show()
Example #32
0
import time

import matplotlib
import matplotlib.pyplot as plt

from sklearn.datasets.samples_generator import make_circles

N=210
K=2
# Maximum number of iterations, if the conditions are not met
MAX_ITERS = 1000
cut=int(N*0.7)

start = time.time()

data, features = make_circles(n_samples=N, shuffle=True, noise= 0.12, factor=0.4)
tr_data, tr_features= data[:cut], features[:cut]
te_data,te_features=data[cut:], features[cut:]

fig, ax = plt.subplots()
ax.scatter(tr_data.transpose()[0], tr_data.transpose()[1], marker = 'o', s = 100, c = tr_features, cmap=plt.cm.coolwarm )
plt.plot()

points=tf.Variable(data)
cluster_assignments = tf.Variable(tf.zeros([N], dtype=tf.int64))

sess = tf.Session()
sess.run(tf.initialize_all_variables())

test=[]
Example #33
0
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets.samples_generator import make_circles

x, y = make_circles(
    n_samples=1000,
    noise=0.1,
    factor=0.2,
    random_state=0
)

#x.shape

# plt.figure(figsize=(5,5))
# #o = formato b = cor
# plt.plot(x[y==0,0],x[y==0,1],'ob',alpha=0.5)
# plt.plot(x[y==1,0],x[y==1,1],'xr',alpha=0.5)
# plt.xlim(-1.5,1.5)
# plt.ylim(-1.5,1.5)
# plt.legend(['0','1'])
# plt.title("Criar um grafico")
# #plt.show()

from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD

model = Sequential()
model.add(Dense(4,input_shape=(2,),activation='tanh'))
model.add(Dense(1,activation='sigmoid'))
model.compile(SGD(lr=0.5),'binary_crossentropy',metrics=['accuracy'])
plot_svc_decision_function(clf)
plt.show()
# Note that a couple of the points touch the lines, these are known as our
# support vectors.
print clf.support_vectors_
# Visually check the concordance between the coordinates printed above
# and the highlighted points in the figure.
plt.scatter(x[:, 0], x[:, 1], c=y, s=50, cmap="spring")
plot_svc_decision_function(clf)
plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=200, facecolors="none")
plt.show()
# Only the support vectors matter with an SVM. Moving points wihtout letting
# them cross the decision boundaries, it would have no effect.
# The SVM becomes more powerful in conjunction with kernels. Let's look
# at some data which is not linearly seperable.
x, y = make_circles(100, factor=0.1, noise=0.1)
clf = SVC(kernel="linear").fit(x, y)

plt.scatter(x[:, 0], x[:, 1], c=y, s=50, cmap="spring")
plot_svc_decision_function(clf)
plt.show()
# Clearly no linear separation is going to work on this data.
# One way we can adjust to this data is to apply a kernel, which is some
# transformation of the input data. We could use the radial basis function.
r = np.exp(-(x[:, 0] ** 2 + x[:, 1] ** 2))
# if we plot this alongside our data, we can see the effect.


def plot_3D(elev=30, azim=30):
    ax = plt.subplot(projection="3d")
    ax.scatter3D(x[:, 0], x[:, 1], r, c=y, s=50, cmap="spring")