def softmax_regression(size,classes,lamda,data,labels,iters,validator,a=0.3):
    #initialize theta
    #TODO figure out why 0.005?
    theta = 0.005 * np.random.rand(size*classes,)

    #wrap initail parameters
    grad_func = functools.partial(softmax_gradient,input_size=size,num_classes=classes,input_labels=labels,lmd=lamda)

    #lbfgs
    W = minfunc.lbfgs(data,theta,grad_func,validator,iters,alpha=a)
    return W
def sparseAutoencoder(vSize,hSize,spars,lamda,b,a,data,iters=400):
    #step1 initialize parameters
    W1,W2,b1,b2 = initializeParameters(hSize,vSize)
    theta = np.r_[W1.flatten(),W2.flatten(),b1.flatten(),b2.flatten()]

    #step2 wrap grad function paramameters
    grad_func = functools.partial(sparseAutoencoderGradient,visibleSize=vSize,hiddenSize=hSize,sparsityParam=spars,lmd=lamda,beta=b)

    #step3 lbfgs
    W = minfunc.lbfgs(data,theta,grad_func,alpha=a,max_iter=iters)
    return W
def softmax_regression(size, classes, lamda, data, labels, iters, a=0.3):
    #initialize theta
    #TODO figure out why 0.005?
    theta = 0.005 * np.random.rand(size * classes, )

    #wrap initail parameters
    grad_func = functools.partial(softmax_gradient,
                                  input_size=size,
                                  num_classes=classes,
                                  input_labels=labels,
                                  lmd=lamda)

    #lbfgs
    W = minfunc.lbfgs(data, theta, grad_func, iters, alpha=a)
    return W
def sparseAutoencoder(vSize, hSize, spars, lamda, b, a, data, iters=400):
    #step1 initialize parameters
    W1, W2, b1, b2 = initializeParameters(hSize, vSize)
    theta = np.r_[W1.flatten(), W2.flatten(), b1.flatten(), b2.flatten()]

    #step2 wrap grad function paramameters
    grad_func = functools.partial(sparseAutoencoderGradient,
                                  visibleSize=vSize,
                                  hiddenSize=hSize,
                                  sparsityParam=spars,
                                  lmd=lamda,
                                  beta=b)

    #step3 lbfgs
    W = minfunc.lbfgs(data, theta, grad_func, alpha=a, max_iter=iters)
    return W