Esempio n. 1
0
    def UnBiasedHSIC(self, x, y, kernelx=vector.CLinearKernel(), \
                     kernely=vector.CLinearKernel()):
        """
        Compute the UNbiased estimator of HSIC.

        Args:
            x: The data.
            y: The labels.
            kernelx: The kernel on the data, default to linear kernel.
            kernely: The kernel on the labels, default to linear kernel.

        Returns:
            HSIC score
        """
        nx = x.shape
        ny = y.shape
        assert nx[0] == ny[0], \
            "Argument 1 and 2 have different number of data points"

        kMat = kernelx.Dot(x, x)
        setdiag0(kMat)

        lMat = kernely.Dot(y, y)
        setdiag0(lMat)

        sK = kMat.sum(axis=1)
        ssK = sK.sum()
        sL = lMat.sum(axis=1)
        ssL = sL.sum()

        return (kMat.__imul__(lMat).sum() + \
                (ssK * ssL) / ((nx[0] - 1) * (nx[0] - 2)) - \
                2 * sK.__imul__(sL).sum() / (nx[0] - 2) \
                ) / (nx[0] * (nx[0] - 3))
Esempio n. 2
0
    def BiasedHSIC(self, x, y, kernelx=vector.CLinearKernel(), \
                   kernely=vector.CLinearKernel()):
        nx = x.shape
        ny = y.shape
        assert nx[0] == ny[0], \
               "Argument 1 and 2 have different number of data points"

        if len(nx) > 1:
            kMat = kernelx.Dot(x, x)
        else:
            kMat = numpy.outerproduct(x, x)

        hlhMat = ComputeHLH(y, kernely)
        return numpy.sum(numpy.sum(kMat * hlhMat)) / ((nx[0] - 1) *
                                                      (nx[0] - 1))
Esempio n. 3
0
    def ComputeHLH(self, y, kernely=vector.CLinearKernel()):
        ny = y.shape
        if len(ny) > 1:
            lMat = kernely.Dot(y, y)
        else:
            lMat = numpy.outerproduct(y, y)

        sL = numpy.sum(lMat, axis=1)
        ssL = numpy.sum(sL)
        # hlhMat
        return lMat - numpy.add.outer(sL, sL) / ny[0] + ssL / (ny[0] * ny[0])
Esempio n. 4
0
    def BiasedHSIC(self, x, y, kernelx=vector.CLinearKernel(), kernely=vector.CLinearKernel()):
        """
        Compute the biased estimator of HSIC.
            @param x The data.
            @param y The labels.
            @param kernelx The kernel on the data, default to linear kernel.
            @param kernely The kernel on the labels, default to linear kernel.
        """

        nx = x.shape
        ny = y.shape
        assert nx[0] == ny[0], \
            "Argument 1 and 2 have different number of data points"

        if len(nx) > 1:
            kMat = kernelx.Dot(x, x)
        else:
            kMat = numpy.outerproduct(x, x)

        hlhMat = self.ComputeHLH(y, kernely)
        return numpy.sum(numpy.sum(kMat * hlhMat)) / ((nx[0] - 1) * (nx[0] - 1))
Esempio n. 5
0
    def UnBiasedHSIC(self, x, y, kernelx=vector.CLinearKernel(), \
                     kernely=vector.CLinearKernel()):
        nx = x.shape
        ny = y.shape
        assert nx[0] == ny[0], \
               "Argument 1 and 2 have different number of data points"

        kMat = kernelx.Dot(x, x)
        setdiag0(kMat)

        lMat = kernely.Dot(y, y)
        setdiag0(lMat)

        sK = kMat.sum(axis=1)
        ssK = sK.sum()
        sL = lMat.sum(axis=1)
        ssL = sL.sum()

        return ( kMat.__imul__(lMat).sum() + \
                 (ssK*ssL)/((nx[0]-1)*(nx[0]-2)) - \
                 2 * sK.__imul__(sL).sum() / (nx[0]-2) \
                 ) / (nx[0]*(nx[0]-3))
Esempio n. 6
0
    def ComputeHLH(self, y, kernely=vector.CLinearKernel()):
        """
        Compute HLH give the labels.
            @param y The labels.
            @param kernely The kernel on the labels, default to linear kernel.
        """
        ny = y.shape
        if len(ny) > 1:
            lMat = kernely.Dot(y, y)
        else:
            lMat = numpy.outerproduct(y, y)

        sL = numpy.sum(lMat, axis=1)
        ssL = numpy.sum(sL)
        # hlhMat
        return lMat - numpy.add.outer(sL, sL) / ny[0] + ssL / (ny[0] * ny[0])
Esempio n. 7
0
def main():
    X = load('xdata.h5')
    y1 = load('y1data.h5')
    y2 = load('y2data.h5')
    y3 = load('y3data.h5')
    y4 = load('y4data.h5')
    # print X.shape
    # print y1.shape

    data_no = X.shape[0]

    y1 = normalize(y1, data_no)
    y2 = normalize(y2, data_no)
    y3 = normalize(y3, data_no)
    y4 = normalize(y4, data_no)

    # print y

    # Normalize the data.
    m = X.mean(0)
    s = X.std(0)
    # print m,s
    X.__isub__(m).__idiv__(s)

    # foo=np.copy(X)

    xlabels = []
    ylabels = []

    foo = np.copy(X)
    for features_tokeep in range(5000, 10001, 1000):
        X = np.copy(foo)
        xlabels.append(features_tokeep)
        print xlabels
        bahsic = CBAHSIC()
        bhs = bahsic.BAHSICRaw(X, y1, vector.CLinearKernel(),
                               vector.CLinearKernel(), features_tokeep, 0.1)
        hsicfeatures = np.zeros(shape=(data_no, features_tokeep))

        for i in range(0, data_no):
            for j in range(0, features_tokeep):
                hsicfeatures[i][j] = X[i][bhs[features_tokeep + j]]

        X = hsicfeatures

        print X.shape

        # y1=y1.reshape((y1.shape[0],))
        # print y1.shape

        #feature extraction
        # test train split
        # xlabels=[]
        # ylabels=[]
        # y1labels=[]
        # y2labels=[]
        # y3labels=[]
        # y4labels=[]

        # pca = PCA(n_components=pcanum)
        # X=pca.fit_transform(foo)
        # print X.shape

        labels = list(zip(y1, y2, y3, y4))
        xtrain, xtest, ytrain, ytest = train_test_split(X,
                                                        labels,
                                                        test_size=0.33,
                                                        random_state=42)

        tmp = zip(*ytest)
        ytest1 = tmp[0]
        ytest2 = tmp[1]
        ytest3 = tmp[2]
        ytest4 = tmp[3]
        ytest1 = np.array(ytest1)
        ytest2 = np.array(ytest2)
        ytest3 = np.array(ytest3)
        ytest4 = np.array(ytest4)

        tmp = zip(*ytrain)
        ytrain1 = tmp[0]
        ytrain2 = tmp[1]
        ytrain3 = tmp[2]
        ytrain4 = tmp[3]
        ytrain1 = np.array(ytrain1)
        ytrain2 = np.array(ytrain2)
        ytrain3 = np.array(ytrain3)
        ytrain4 = np.array(ytrain4)

        # clf=SVR(kernel='rbf')
        clf = LinearRegression()
        clf.fit(xtrain, ytrain1)
        vals1 = clf.predict(xtest)
        # print vals1
        # print accuracyregression(ytest1,vals1)

        # clf=SVR(kernel='linear',C=1e3)
        clf.fit(xtrain, ytrain2)
        vals2 = clf.predict(xtest)
        # print accuracyregression(ytest2,vals2)

        # clf=SVR(kernel='linear',C=1e3)
        clf.fit(xtrain, ytrain3)
        vals3 = clf.predict(xtest)
        # print accuracyregression(ytest3,vals3)

        # clf=SVR(kernel='linear',C=1e3)
        clf.fit(xtrain, ytrain4)
        vals4 = clf.predict(xtest)
        # print accuracyregression(ytest4,vals4)

        truth = np.zeros((4, len(ytest1)))
        predicted = np.zeros((4, len(vals1)))
        for j in range(len(ytest1)):
            truth[0][j] = ytest1[j]
            predicted[0][j] = vals1[j]
            truth[1][j] = ytest2[j]
            predicted[1][j] = vals2[j]
            truth[2][j] = ytest3[j]
            predicted[2][j] = vals3[j]
            truth[3][j] = ytest4[j]
            predicted[3][j] = vals4[j]

        ylabels.append(accuracyclassification(truth, predicted))
        # print accuracyclassification(truth,predicted)

    print xlabels
    print ylabels
    plt.plot(xlabels, ylabels)
    plt.savefig("classificationbasic1.png")
        # Normalize the labels.
        y = 1.0 * y
        tmp_no = numpy.sum(y)
        pno = (data_no + tmp_no) / 2
        nno = (data_no - tmp_no) / 2
        y[y > 0] = y[y > 0] / pno
        y[y < 0] = y[y < 0] / nno

        # Normalize the data.
        m = X.mean(0)
        s = X.std(0)
        X.__isub__(m).__idiv__(s)

        t1 = time.clock()
        tmp = bahsic.BAHSICRaw(X, y, vector.CLinearKernel(),
                               vector.CLinearKernel(), features_tokeep, 0.1)
        t2 = time.clock()
        print "time taken: " + str(t2 - t1)
        print '--rank of the features'
        print '--better features towards the end of the list:'
        print tmp

        hsicfeatures = numpy.zeros(shape=(data_no, features_tokeep))
        for i in range(0, data_no):
            for j in range(0, features_tokeep):
                hsicfeatures[i][j] = X[i][tmp[features_tokeep + j]]

        numpy.savetxt(file_out, hsicfeatures)
        if (sys.argv == 5):
            numpy.savetxt('original.csv', X)