Exemple #1
0
def test_corr_multiple():
    DT = dt.Frame(A=[3, 5, 9, 1], B=[4, 7, 0, 0], C=[3, 2, 1, 0], D=range(4))
    D1 = DT[:, corr(f.A, f[:])]
    D2 = DT[:, corr(f[:], f.D)]
    D3 = DT[:, corr(f[:], f[:])]
    a = -0.07168504827326534
    b = 0.07559289460184544
    c = 0.7207110797203374
    assert_equals(D1, dt.Frame([[1.0], [a], [b], [-b]]))
    assert_equals(D2, dt.Frame([[-b], [-c], [-1.0], [1.0]]))
    assert_equals(D3, dt.Frame([[1.0], [1.0], [1.0], [1.0]]))
Exemple #2
0
def test_corr_random(numpy, seed):
    numpy.random.seed(seed)
    arr1 = numpy.random.rand(100)
    arr2 = numpy.random.rand(100)
    np_corr = numpy.corrcoef(arr1, arr2)[0, 1]

    DT = dt.Frame([arr1, arr2])
    dt_corr = DT[:, corr(f[0], f[1])][0, 0]
    assert numpy.isclose(np_corr, dt_corr, atol=1e-12, rtol=1e-12)
Exemple #3
0
def test_corr_with_constant():
    DT = dt.Frame(A=range(23), B=[2.5] * 23)
    D1 = DT[:, corr(f.A, f.B)]
    assert_equals(D1, dt.Frame([math.nan]))
Exemple #4
0
def test_corr_small_frame():
    D1 = dt.Frame(A=[1], B=[2])[:, corr(f.A, f.B)]
    D2 = dt.Frame(A=[], B=[])[:, corr(f.A, f.B)]
    assert_equals(D1, dt.Frame([None], stype=dt.float64))
    assert_equals(D2, dt.Frame([None], stype=dt.float64))
Exemple #5
0
def test_corr_simple2():
    DT = dt.Frame(A=range(5), B=range(5, 0, -1))
    D1 = DT[:, corr(f.A, f.B)]
    assert_equals(D1, dt.Frame([-1.0]))
Exemple #6
0
          fun=fun,
          run=2,
          time_sec=t,
          mem_gb=m,
          cache=cache,
          chk=make_chk(flatten(chk.to_list())),
          chk_time_sec=chkt,
          on_disk=on_disk)
print(ans.head(3), flush=True)
print(ans.tail(3), flush=True)
del ans

question = 'regression v1 v2 by id2 id4'  # q9
gc.collect()
t_start = timeit.default_timer()
ans = x[:, {'r2': corr(f.v1, f.v2)**2}, by(f.id2, f.id4)]
print(ans.shape, flush=True)
t = timeit.default_timer() - t_start
m = memory_usage()
t_start = timeit.default_timer()
chk = ans[:, sum(f.r2)]
chkt = timeit.default_timer() - t_start
write_log(task=task,
          data=data_name,
          in_rows=x.shape[0],
          question=question,
          out_rows=ans.shape[0],
          out_cols=ans.shape[1],
          solution=solution,
          version=ver,
          git=git,
Exemple #7
0
def test_corr_with_constant():
    DT = dt.Frame(A=range(23), B=[2.5] * 23)
    D1 = DT[:, corr(f.A, f.B)]
    assert_equals(D1, dt.Frame([None], type=float))
Exemple #8
0
ans = x[:2, {"largest2_v3": f.v3}, by(f.id6), sort(-f.v3)]
print(ans.shape, flush=True)
t = timeit.default_timer() - t_start
m = memory_usage()
t_start = timeit.default_timer()
chk = ans[:, sum(f.largest2_v3)]
chkt = timeit.default_timer() - t_start
write_log(task=task, data=data_name, in_rows=x.shape[0], question=question, out_rows=ans.shape[0], out_cols=ans.shape[1], solution=solution, version=ver, git=git, fun=fun, run=2, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(flatten(chk.to_list())), chk_time_sec=chkt, on_disk=on_disk)
print(ans.head(3).to_pandas(), flush=True)
print(ans.tail(3).to_pandas(), flush=True)
del ans

question = "regression v1 v2 by id2 id4" # q9 # not yet implemeneted https://github.com/h2oai/datatable/issues/1543
gc.collect()
t_start = timeit.default_timer()
ans = x[:, {"r2": corr(f.v1, f.v2)**2}, by(f.id2, f.id4)]
print(ans.shape, flush=True)
t = timeit.default_timer() - t_start
m = memory_usage()
t_start = timeit.default_timer()
chk = ans[:, sum(f.r2)]
chkt = timeit.default_timer() - t_start
write_log(task=task, data=data_name, in_rows=x.shape[0], question=question, out_rows=ans.shape[0], out_cols=ans.shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(flatten(chk.to_list())), chk_time_sec=chkt, on_disk=on_disk)
del ans
gc.collect()
t_start = timeit.default_timer()
ans = x[:, {"r2": corr(f.v1, f.v2)**2}, by(f.id2, f.id4)]
print(ans.shape, flush=True)
t = timeit.default_timer() - t_start
m = memory_usage()
t_start = timeit.default_timer()