def test_pyrdc_vs_Rrdc_one_hot_enc(): numpy2ri.activate() # # generate random data from two gaussians rand_gen = numpy.random.RandomState(1337) n_instances = 20 X_cols = 10 X_size = (n_instances, X_cols) X_rand = numpy.zeros(X_size) for i in range(n_instances): k = rand_gen.choice(X_cols) X_rand[i, k] = 1 print(X_rand) Y_cols = 10 Y_size = (n_instances, Y_cols) Y_rand = numpy.zeros(Y_size) for i in range(n_instances): k = rand_gen.choice(Y_cols) Y_rand[i, k] = 1 print(Y_rand) print('X: {}, Y: {}'.format(X_rand.shape, Y_rand.shape)) k_values = [1, 2, 10, 20, 100, 200] for k in k_values: print('\nConsidering k: {}'.format(k)) # # generate random normals through R rnorm = robjects.r["rnorm"] rnorm_X = rnorm((X_cols + 1) * k) rnorm_X_m = numpy.array(rnorm_X).reshape(X_cols + 1, k) rnorm_Y = rnorm((Y_cols + 1) * k) rnorm_Y_m = numpy.array(rnorm_Y).reshape(Y_cols + 1, k) print("\trnorm X: {}, rnorm Y: {}".format(rnorm_X_m.shape, rnorm_Y_m.shape)) # # loading the R rdc test test from Rrdc_rank = robjects.r(RRDC_rank) Rrdc_rank_value = Rrdc_rank(X_rand, Y_rand, rnorm_X, rnorm_Y, k=k) print("\tR (rank) value: {}".format(Rrdc_rank_value)) # # loading the R rdc test test from Rrdc_ecdf = robjects.r(RRDC_ecdf) Rrdc_ecdf_value = Rrdc_ecdf(X_rand, Y_rand, rnorm_X, rnorm_Y, k=k) print("\tR (ecdf) value: {}".format(Rrdc_ecdf_value)) pyrdc_value = rdc(X_rand, Y_rand, rnorm_X=rnorm_X_m, rnorm_Y=rnorm_Y_m) print("\tPython value: {}".format(pyrdc_value))
def test_pyrdc_vs_Rrdc_normal_dependent_data(): numpy2ri.activate() # # generate random data from two gaussians rand_gen = numpy.random.RandomState(1337) n_instances = 5000 X_cols = 100 X_size = (n_instances, X_cols) X_loc = 10 X_var = 1.0 X_rand = rand_gen.normal(size=X_size, loc=X_loc, scale=X_var) Y_cols = 100 Y_size = (n_instances, Y_cols) Y_loc = -25 Y_var = 0.25 alpha = 3.0 beta = 115 Y_rand = alpha * X_rand + beta # rand_gen.normal(size=Y_size, loc=Y_loc, scale=Y_var) print('X: {}, Y: {}'.format(X_rand.shape, Y_rand.shape)) k_values = [ 1, 2, 10, 20, Y_cols, X_cols, X_cols + Y_cols, (X_cols + Y_cols) * 2 ] for k in k_values: print('\nConsidering k: {}'.format(k)) # # generate random normals through R rnorm = robjects.r["rnorm"] rnorm_X = rnorm((X_cols + 1) * k) rnorm_X_m = numpy.array(rnorm_X).reshape(X_cols + 1, k) rnorm_Y = rnorm((Y_cols + 1) * k) rnorm_Y_m = numpy.array(rnorm_Y).reshape(Y_cols + 1, k) print("\trnorm X: {}, rnorm Y: {}".format(rnorm_X_m.shape, rnorm_Y_m.shape)) # # loading the R rdc test test from Rrdc_rank = robjects.r(RRDC_rank) Rrdc_rank_value = Rrdc_rank(X_rand, Y_rand, rnorm_X, rnorm_Y, k=k) print("\tR (rank) value: {}".format(Rrdc_rank_value)) # # loading the R rdc test test from Rrdc_ecdf = robjects.r(RRDC_ecdf) Rrdc_ecdf_value = Rrdc_ecdf(X_rand, Y_rand, rnorm_X, rnorm_Y, k=k) print("\tR (ecdf) value: {}".format(Rrdc_ecdf_value)) pyrdc_value = rdc(X_rand, Y_rand, rnorm_X=rnorm_X_m, rnorm_Y=rnorm_Y_m) print("\tPython value: {}".format(pyrdc_value))
def test_pyrdc_vs_Rrdc_bernoulli_data_monodim(): numpy2ri.activate() # # generate random data from two gaussians rand_gen = numpy.random.RandomState(1337) n_instances = 20 X_cols = 1 X_size = (n_instances, X_cols) X_theta = .68 X_rand = rand_gen.binomial(n=1, p=X_theta, size=X_size) print(','.join(str(c[0]) for c in X_rand)) Y_cols = 1 Y_size = (n_instances, Y_cols) Y_theta = .22 Y_rand = rand_gen.binomial(n=1, p=Y_theta, size=Y_size) print(','.join(str(c[0]) for c in Y_rand)) print('X: {}, Y: {}'.format(X_rand.shape, Y_rand.shape)) k_values = [1, 2, 10, 20, 100, 200] for k in k_values: print('\nConsidering k: {}'.format(k)) # # generate random normals through R rnorm = robjects.r["rnorm"] rnorm_X = rnorm((X_cols + 1) * k) rnorm_X_m = numpy.array(rnorm_X).reshape(X_cols + 1, k) rnorm_Y = rnorm((Y_cols + 1) * k) rnorm_Y_m = numpy.array(rnorm_Y).reshape(Y_cols + 1, k) print("\trnorm X: {}, rnorm Y: {}".format(rnorm_X_m.shape, rnorm_Y_m.shape)) # # loading the R rdc test test from Rrdc_rank = robjects.r(RRDC_rank) Rrdc_rank_value = Rrdc_rank(X_rand, Y_rand, rnorm_X, rnorm_Y, k=k) print("\tR (rank) value: {}".format(Rrdc_rank_value)) # # loading the R rdc test test from Rrdc_ecdf = robjects.r(RRDC_ecdf) Rrdc_ecdf_value = Rrdc_ecdf(X_rand, Y_rand, rnorm_X, rnorm_Y, k=k) print("\tR (ecdf) value: {}".format(Rrdc_ecdf_value)) pyrdc_value = rdc(X_rand, Y_rand, rnorm_X=rnorm_X_m, rnorm_Y=rnorm_Y_m) print("\tPython value: {}".format(pyrdc_value))
def test_pyrdc_vs_Rrdc_bernoulli_data_monodim(): numpy.random.seed(42) gen = numpy.random.poisson(5, 1000) gen2 = numpy.random.poisson(5, 1000) genSorted = numpy.sort(gen) genmixture = numpy.hstack((gen, numpy.random.poisson(25, len(gen)))) genmixture2 = numpy.hstack((gen2, numpy.random.poisson(25, len(gen2)))) genmixtureSorted = numpy.sort(genmixture) negdependency = numpy.transpose(numpy.hstack((numpy.vstack(([0] * len(gen), gen)), numpy.vstack((gen, [0] * len(gen)))))) genmixture3 = numpy.hstack((numpy.random.poisson(2, 1000), numpy.random.poisson(13, 2000))) negdependencymixture = numpy.transpose(numpy.hstack((numpy.vstack(([0] * len(genmixture3), genmixture3)), numpy.vstack((genmixture3, [0] * len(genmixture3)))))) negdependencymixture = negdependencymixture[~numpy.all(negdependencymixture == 0, axis=1)] independent = numpy.transpose(numpy.vstack((gen, gen2))) independentmixture = numpy.transpose(numpy.vstack((genmixture, genmixture2))) opposite = numpy.transpose(numpy.vstack((genSorted, genSorted[::-1]))) oppositemixture = numpy.transpose(numpy.vstack((genmixtureSorted, genmixtureSorted[::-1]))) cooccur = numpy.transpose(numpy.vstack((gen, gen))) cooccurmixture = numpy.transpose(numpy.vstack((genmixture, genmixture))) print() print("cooccur", rdc(cooccur[:,0], cooccur[:,1])) print("negdependency", rdc(negdependency[:,0], negdependency[:,1])) print("negdependencymixture", rdc(negdependencymixture[:,0], negdependencymixture[:,1]) ) print("independent", rdc(independent[:,0], independent[:,1]) ) print("independentmixture", rdc(independentmixture[:,0], independentmixture[:,1]) ) print("opposite", rdc(opposite[:,0], opposite[:,1]) ) print("oppositemixture", rdc(oppositemixture[:,0], oppositemixture[:,1]) ) print("cooccur", rdc(cooccur[:,0], cooccur[:,1]) ) print("cooccurmixture", rdc(cooccurmixture[:,0], cooccurmixture[:,1]) ) print(getIndependentGroups(independentmixture, 0.05, "poisson"))