예제 #1
0
def test_pyrdc_vs_Rrdc_one_hot_enc():

    numpy2ri.activate()

    #
    # generate random data from two gaussians
    rand_gen = numpy.random.RandomState(1337)

    n_instances = 20
    X_cols = 10
    X_size = (n_instances, X_cols)
    X_rand = numpy.zeros(X_size)
    for i in range(n_instances):
        k = rand_gen.choice(X_cols)
        X_rand[i, k] = 1

    print(X_rand)

    Y_cols = 10
    Y_size = (n_instances, Y_cols)
    Y_rand = numpy.zeros(Y_size)
    for i in range(n_instances):
        k = rand_gen.choice(Y_cols)
        Y_rand[i, k] = 1
    print(Y_rand)

    print('X: {}, Y: {}'.format(X_rand.shape, Y_rand.shape))

    k_values = [1, 2, 10, 20, 100, 200]

    for k in k_values:
        print('\nConsidering k: {}'.format(k))
        #
        # generate random normals through R
        rnorm = robjects.r["rnorm"]
        rnorm_X = rnorm((X_cols + 1) * k)
        rnorm_X_m = numpy.array(rnorm_X).reshape(X_cols + 1, k)
        rnorm_Y = rnorm((Y_cols + 1) * k)
        rnorm_Y_m = numpy.array(rnorm_Y).reshape(Y_cols + 1, k)
        print("\trnorm X: {}, rnorm Y: {}".format(rnorm_X_m.shape,
                                                  rnorm_Y_m.shape))

        #
        # loading the R rdc test test from
        Rrdc_rank = robjects.r(RRDC_rank)

        Rrdc_rank_value = Rrdc_rank(X_rand, Y_rand, rnorm_X, rnorm_Y, k=k)
        print("\tR (rank) value: {}".format(Rrdc_rank_value))

        #
        # loading the R rdc test test from
        Rrdc_ecdf = robjects.r(RRDC_ecdf)

        Rrdc_ecdf_value = Rrdc_ecdf(X_rand, Y_rand, rnorm_X, rnorm_Y, k=k)
        print("\tR (ecdf) value: {}".format(Rrdc_ecdf_value))

        pyrdc_value = rdc(X_rand, Y_rand, rnorm_X=rnorm_X_m, rnorm_Y=rnorm_Y_m)
        print("\tPython value: {}".format(pyrdc_value))
예제 #2
0
def test_pyrdc_vs_Rrdc_normal_dependent_data():

    numpy2ri.activate()

    #
    # generate random data from two gaussians
    rand_gen = numpy.random.RandomState(1337)

    n_instances = 5000
    X_cols = 100
    X_size = (n_instances, X_cols)
    X_loc = 10
    X_var = 1.0
    X_rand = rand_gen.normal(size=X_size, loc=X_loc, scale=X_var)

    Y_cols = 100
    Y_size = (n_instances, Y_cols)
    Y_loc = -25
    Y_var = 0.25
    alpha = 3.0
    beta = 115
    Y_rand = alpha * X_rand + beta  # rand_gen.normal(size=Y_size, loc=Y_loc, scale=Y_var)

    print('X: {}, Y: {}'.format(X_rand.shape, Y_rand.shape))

    k_values = [
        1, 2, 10, 20, Y_cols, X_cols, X_cols + Y_cols, (X_cols + Y_cols) * 2
    ]

    for k in k_values:
        print('\nConsidering k: {}'.format(k))
        #
        # generate random normals through R
        rnorm = robjects.r["rnorm"]
        rnorm_X = rnorm((X_cols + 1) * k)
        rnorm_X_m = numpy.array(rnorm_X).reshape(X_cols + 1, k)
        rnorm_Y = rnorm((Y_cols + 1) * k)
        rnorm_Y_m = numpy.array(rnorm_Y).reshape(Y_cols + 1, k)
        print("\trnorm X: {}, rnorm Y: {}".format(rnorm_X_m.shape,
                                                  rnorm_Y_m.shape))

        #
        # loading the R rdc test test from
        Rrdc_rank = robjects.r(RRDC_rank)

        Rrdc_rank_value = Rrdc_rank(X_rand, Y_rand, rnorm_X, rnorm_Y, k=k)
        print("\tR (rank) value: {}".format(Rrdc_rank_value))

        #
        # loading the R rdc test test from
        Rrdc_ecdf = robjects.r(RRDC_ecdf)

        Rrdc_ecdf_value = Rrdc_ecdf(X_rand, Y_rand, rnorm_X, rnorm_Y, k=k)
        print("\tR (ecdf) value: {}".format(Rrdc_ecdf_value))

        pyrdc_value = rdc(X_rand, Y_rand, rnorm_X=rnorm_X_m, rnorm_Y=rnorm_Y_m)
        print("\tPython value: {}".format(pyrdc_value))
예제 #3
0
def test_pyrdc_vs_Rrdc_bernoulli_data_monodim():

    numpy2ri.activate()

    #
    # generate random data from two gaussians
    rand_gen = numpy.random.RandomState(1337)

    n_instances = 20
    X_cols = 1
    X_size = (n_instances, X_cols)
    X_theta = .68
    X_rand = rand_gen.binomial(n=1, p=X_theta, size=X_size)
    print(','.join(str(c[0]) for c in X_rand))

    Y_cols = 1
    Y_size = (n_instances, Y_cols)
    Y_theta = .22
    Y_rand = rand_gen.binomial(n=1, p=Y_theta, size=Y_size)
    print(','.join(str(c[0]) for c in Y_rand))

    print('X: {}, Y: {}'.format(X_rand.shape, Y_rand.shape))

    k_values = [1, 2, 10, 20, 100, 200]

    for k in k_values:
        print('\nConsidering k: {}'.format(k))
        #
        # generate random normals through R
        rnorm = robjects.r["rnorm"]
        rnorm_X = rnorm((X_cols + 1) * k)
        rnorm_X_m = numpy.array(rnorm_X).reshape(X_cols + 1, k)
        rnorm_Y = rnorm((Y_cols + 1) * k)
        rnorm_Y_m = numpy.array(rnorm_Y).reshape(Y_cols + 1, k)
        print("\trnorm X: {}, rnorm Y: {}".format(rnorm_X_m.shape,
                                                  rnorm_Y_m.shape))

        #
        # loading the R rdc test test from
        Rrdc_rank = robjects.r(RRDC_rank)

        Rrdc_rank_value = Rrdc_rank(X_rand, Y_rand, rnorm_X, rnorm_Y, k=k)
        print("\tR (rank) value: {}".format(Rrdc_rank_value))

        #
        # loading the R rdc test test from
        Rrdc_ecdf = robjects.r(RRDC_ecdf)

        Rrdc_ecdf_value = Rrdc_ecdf(X_rand, Y_rand, rnorm_X, rnorm_Y, k=k)
        print("\tR (ecdf) value: {}".format(Rrdc_ecdf_value))

        pyrdc_value = rdc(X_rand, Y_rand, rnorm_X=rnorm_X_m, rnorm_Y=rnorm_Y_m)
        print("\tPython value: {}".format(pyrdc_value))
예제 #4
0
def test_pyrdc_vs_Rrdc_bernoulli_data_monodim():

    numpy.random.seed(42)

    gen = numpy.random.poisson(5, 1000)
    gen2 = numpy.random.poisson(5, 1000)
    
    genSorted = numpy.sort(gen)
    
    genmixture = numpy.hstack((gen, numpy.random.poisson(25, len(gen))))
    genmixture2 = numpy.hstack((gen2, numpy.random.poisson(25, len(gen2))))
    
    genmixtureSorted = numpy.sort(genmixture)
    
    negdependency = numpy.transpose(numpy.hstack((numpy.vstack(([0] * len(gen), gen)), numpy.vstack((gen, [0] * len(gen))))))
    
    genmixture3 = numpy.hstack((numpy.random.poisson(2, 1000), numpy.random.poisson(13, 2000)))
    negdependencymixture = numpy.transpose(numpy.hstack((numpy.vstack(([0] * len(genmixture3), genmixture3)), numpy.vstack((genmixture3, [0] * len(genmixture3))))))
    negdependencymixture = negdependencymixture[~numpy.all(negdependencymixture == 0, axis=1)]
    
    independent = numpy.transpose(numpy.vstack((gen, gen2)))
    independentmixture = numpy.transpose(numpy.vstack((genmixture, genmixture2)))
    
    opposite = numpy.transpose(numpy.vstack((genSorted, genSorted[::-1])))
    oppositemixture = numpy.transpose(numpy.vstack((genmixtureSorted, genmixtureSorted[::-1])))
    
    cooccur = numpy.transpose(numpy.vstack((gen, gen)))
    cooccurmixture = numpy.transpose(numpy.vstack((genmixture, genmixture)))

    print()
    print("cooccur", rdc(cooccur[:,0], cooccur[:,1]))

    print("negdependency", rdc(negdependency[:,0], negdependency[:,1]))
    print("negdependencymixture", rdc(negdependencymixture[:,0], negdependencymixture[:,1]) )
    
    print("independent", rdc(independent[:,0], independent[:,1]) )
    print("independentmixture", rdc(independentmixture[:,0], independentmixture[:,1]) )
    
    print("opposite", rdc(opposite[:,0], opposite[:,1]) )
    print("oppositemixture", rdc(oppositemixture[:,0], oppositemixture[:,1]) )
    
    print("cooccur", rdc(cooccur[:,0], cooccur[:,1]) )
    print("cooccurmixture", rdc(cooccurmixture[:,0], cooccurmixture[:,1]) )

    print(getIndependentGroups(independentmixture, 0.05, "poisson"))