def create_tr_vl_ts_nc(path, n):
    """nested circles"""
    if not os.path.exists(path):
        os.makedirs(path)
    x, y = generate_nested_circles(n)
    nbr = x.shape[0]
    l1 = int(nbr*2/3.)
    l2 = int(nbr * ((2/3.) + 1/6.))
    trainx, trainy = x[:l1, :], y[:l1]
    validx, validy = x[l1:l2, :], y[l1:l2]
    testx, testy = x[l2:, :], y[l2:]
    trfig = plot_classes(
        trainy, trainx, "", 0., "g.tr 2D: nested circles." + str(l1))
    vlfig = plot_classes(
        validy, validx, "", 0., "g.vl 2D: nested circles." + str(l2-l1))
    tsfig = plot_classes(
        testy, testx, "", 0., "g.tst 2D: nested circles." + str(y.size - l2))
    trfig.savefig(path + "/traingfig.png", bbox_inches='tight')
    vlfig.savefig(path + "/validfig.png", bbox_inches='tight')
    tsfig.savefig(path + "/testfig.png", bbox_inches='tight')
    # dump
    with open(path+"/nc.pkl", "w") as f:
        stuff = {"trainx": trainx, "trainy": trainy,
                 "validx": validx, "validy": validy,
                 "testx": testx, "testy": testy}
        pkl.dump(stuff, f, protocol=pkl.HIGHEST_PROTOCOL)
def generate_2d_checkboard(x_born, y_born, s, ss):
    """x_born: [-1, 1], y_born:[-1, 1], s=10, ss=20
    """
    linex = np.linspace(x_born[0], x_born[1], s, endpoint=False)
    liney = np.linspace(y_born[0], y_born[1], s, endpoint=False)
    x, y = [], []
    start_y = True
    for ix in range(linex.size - 1):
        lx, lxnext = linex[ix], linex[ix+1]
        for iy in range(liney.size - 1):
            ly, lynext = liney[iy], liney[iy+1]
            linexx = np.linspace(lx, lxnext, ss, endpoint=False)
            lineyy = np.linspace(ly, lynext, ss, endpoint=False)
            xv, yv = np.meshgrid(linexx, lineyy)
            for i in range(xv.shape[0]):
                for j in range(yv.shape[0]):
                    x.append([xv[i, j], yv[i, j]])
                    y.append(start_y)
            start_y = not start_y

    y = np.array(y) * 1.
    x = np.array(x)
    mega = np.hstack((x, y.reshape(y.size, 1)))
    for i in range(500):
        np.random.shuffle(mega)
        print i
    x = mega[:, :-1]
    y = mega[:, -1]
    print x.shape, y.shape
    fig = plot_classes(y, x, "", 0., "generated 2D: checkboard.")
    fig.savefig("data/2d/cb2d_generated.png", bbox_inches='tight')
    return x, y
def create_tr_vl_ts_cb(path):
    if not os.path.exists(path):
        os.makedirs(path)
    x, y = generate_2d_checkboard([-1, 1], [-1, 1], 10, 20)
    nbr = x.shape[0]
    l1 = int(nbr*2/3.)
    l2 = int(nbr * ((2/3.) + 1/6.))
    trainx, trainy = x[:l1, :], y[:l1]
    validx, validy = x[l1:l2, :], y[l1:l2]
    testx, testy = x[l2:, :], y[l2:]
    trfig = plot_classes(trainy, trainx, "", 0., "g.tr 2D: checkboard.")
    vlfig = plot_classes(validy, validx, "", 0., "g.vl 2D: checkboard.")
    tsfig = plot_classes(testy, testx, "", 0., "g.tst 2D: checkboard.")
    trfig.savefig(path + "/traingfig.png", bbox_inches='tight')
    vlfig.savefig(path + "/validfig.png", bbox_inches='tight')
    tsfig.savefig(path + "/testfig.png", bbox_inches='tight')
    # dump
    with open(path+"/cb.pkl", "w") as f:
        stuff = {"trainx": trainx, "trainy": trainy,
                 "validx": validx, "validy": validy,
                 "testx": testx, "testy": testy}
        pkl.dump(stuff, f, protocol=pkl.HIGHEST_PROTOCOL)
def generate_all_2d_data(path):
    if not os.path.exists(path):
        os.makedirs(path)
    mn1, cov1 = [1, 0], [[1, -0.5], [-0.5, 1]]
    mn2, cov2 = [4, 0], [[1, 0], [0, 1]]
    trainx, trainy, trainfig = generate_2d_data_bin(25000, mn1,
                                                    cov1, mn2, cov2)
    minx = np.min(trainx, axis=0)
    maxx = np.max(trainx, axis=0)

    trainx = (trainx - minx)/(maxx - minx)
    trainfig = plot_classes(trainy, trainx, "", 0.,
                            "generated 2D: multivariate normal.")
    validx, validy, validfig = generate_2d_data_bin(5000, mn1,
                                                    cov1, mn2, cov2)
    validx = (validx - minx)/(maxx - minx)
    validfig = plot_classes(trainy, trainx, "", 0.,
                            "generated 2D: multivariate normal.")
    testx, testy, testfig = generate_2d_data_bin(5000, mn1,
                                                 cov1, mn2, cov2)
    trainfig.savefig(path + "/traingfig.png", bbox_inches='tight')
    validfig.savefig(path + "/validfig.png", bbox_inches='tight')
    testfig.savefig(path + "/testfig.png", bbox_inches='tight')
def generate_2d_data_bin(nbr, mn1, cov1, mn2, cov2):
    """Generate 2D points using multivariate normal distribution.
    nbr: number of samples per class."""
    x1 = np.random.multivariate_normal(mn1, cov1, nbr)
    x2 = np.random.multivariate_normal(mn2, cov2, nbr)
    y1 = np.zeros((nbr, 1), dtype=np.float32)
    y2 = np.ones((nbr, 1), dtype=np.float32)
    x = np.vstack((x1, x2))
    y = np.vstack((y1, y2))
    print x.shape, y.shape
    mega = np.hstack((x, y.reshape(y.size, 1)))
    for i in range(100):
        np.random.shuffle(mega)
    x = mega[:, :-1]
    y = mega[:, -1]
    fig = plot_classes(y, x, "", 0., "generated 2D: multivariate normal.")
    return x, y, fig
def generate_nested_circles(n):
    limits = [0, 1./3, 2./3, 1, 2]
    np.random.seed(0)
    X = np.random.rand(n, 2)*2-1
    Xd = np.sqrt((X**2).sum(axis=1))
    Y = np.zeros((n, ), dtype='bool')
    classe = True
    for b1, b2 in zip(limits[:-1], limits[1:]):
        (idx, ) = np.nonzero(np.logical_and(b1 < Xd, Xd <= b2))
        Y[idx] = classe
        classe = not classe
    Y = Y.astype(np.float32)
    mega = np.hstack((X, Y.reshape(Y.size, 1)))
    for i in range(500):
        np.random.shuffle(mega)
        print i
    x = mega[:, :-1]
    y = mega[:, -1]
    print x.shape, y.shape
    fig = plot_classes(y, x, "", 0., "generated 2D: nested circles.")
    fig.savefig("data/nestedcircle/nc_generated.png", bbox_inches='tight')
    return x, y