Esempio n. 1
0
            def initialize():
                np.random.seed(idx + 1)
                skeleton = sized_random_skeleton(schema, sizing_method(base_size, schema), seed=idx + 1)
                lg_rcm = linear_gaussians_rcm(rcm, seed=idx + 1)
                generate_values_for_skeleton(lg_rcm, skeleton, seed=idx + 1)

                datasource = DataCenter(skeleton)
                kerner = RBFKernelComputer(datasource, additive=1e-2, n_jobs=n_jobs, eqsize_only=False, k_cache_max_size=128)
                _tester = RCITester(kerner, n_jobs=n_jobs)
                return _tester
Esempio n. 2
0
def test_company():
    n = 400
    schema, rcm = company_schema(), company_rcm()
    functions = dict()
    effects = {RelationalVariable(RelationalPath(rcm.schema.item_class_of(attr)), attr) for attr in rcm.schema.attrs}
    skeleton = generate_skeleton(schema, n, max_degree=2)

    for e in effects:
        parameters = {cause: 1.0 for cause in rcm.pa(e)}
        functions[e] = linear_gaussian(parameters, average_agg(), normal_sampler(0, 0.3))

    rcm = ParamRCM(rcm.schema, rcm.directed_dependencies, functions)

    generate_values_for_skeleton(rcm, skeleton)
    normalize_skeleton(skeleton)
Esempio n. 3
0
def generate_values(independent, mu, sd, skeleton, slope, seed=None):
    if seed is not None:
        np.random.seed(seed)

    if independent:
        slope = 0.0

    var_X = RelationalVariable(RelationalPath(A), X)
    var_Y = RelationalVariable(RelationalPath(B), Y)
    var_U = RelationalVariable(RelationalPath(C), U)
    var_V = RelationalVariable(RelationalPath(D), V)
    rcm = RCM(
        schema,
        {
            RelationalDependency(
                RelationalVariable(RelationalPath([A, AC, C]), U), var_X),
            RelationalDependency(
                RelationalVariable(RelationalPath([B, BD, D]), V), var_Y),
            RelationalDependency(
                RelationalVariable(RelationalPath([B, AB, A]), X),
                var_Y)  # X-->Y
        })

    functions = {
        var_U:
        linear_gaussian(dict(), average_agg(), normal_sampler(mu, sd)),
        var_V:
        linear_gaussian(dict(), average_agg(), normal_sampler(mu, sd)),
        var_X:
        linear_gaussian(
            {RelationalVariable(RelationalPath([A, AC, C]), U): 1.0},
            sum_agg(), normal_sampler(0, sd)),
        var_Y:
        linear_gaussian(
            {
                RelationalVariable(RelationalPath([B, BD, D]), V): 1.0,
                RelationalVariable(RelationalPath([B, AB, A]), X): slope
            }, sum_agg(), normal_sampler(0, sd))
    }

    # Parametrize RCM and generate values
    param_rcm = ParamRCM(rcm.schema, rcm.directed_dependencies, functions)
    generate_values_for_skeleton(param_rcm, skeleton)
def generate_values(seed,
                    schema,
                    skeleton,
                    null_hypothesis=True,
                    mu=0.0,
                    sd=0.1):
    if seed is not None:
        np.random.seed(seed)

    A, Z = schema['A'], schema['Z']
    B, Y = schema['B'], schema['Y']
    C, X = schema['C'], schema['X']
    R_AB = schema['R_AB']
    R_AC = schema['R_AC']
    R_BC = schema['R_BC']

    # 2-hop here = 1 hop in an entity-only graph
    rpaths_froms = {
        ent_class: list(enumerate_rpaths(schema, 2, ent_class))
        for ent_class in [A, B, C]
    }

    # if X _||_Y | Z:
    #   (X --> Z --> Y) or (X <-- Z --> Y)
    # if not X _||_Y | Z
    #   (X --> Z <-- Y)
    independents = [[(X, Z), (Z, Y)], [(Z, X), (Z, Y)]]
    dependents = [
        [(X, Z), (Y, Z)],
    ]

    if null_hypothesis:
        templates = independents
    else:
        templates = dependents

    # Model structure specification
    rdeps = []
    arrows = pick_one(templates)
    for from_attr, to_attr in arrows:
        # from_attr --> to_Attr
        from_item_class = schema.item_class_of(from_attr)
        base_item_class = schema.item_class_of(to_attr)
        rpath = pick_one(
            list(
                filter(lambda rp: rp.terminal == from_item_class,
                       rpaths_froms[base_item_class])))

        cause_rvar = RelationalVariable(rpath, from_attr)
        effect_rvar = RelationalVariable(RelationalPath(base_item_class),
                                         to_attr)
        rdeps.append(RelationalDependency(cause_rvar, effect_rvar))

    rcm = RCM(schema, set(rdeps))

    # Model 'function' specification
    functions = dict()
    canonical_rvars = enumerate_rvars(schema, 0)
    for canonical_rvar in canonical_rvars:
        parents = list(rcm.pa(canonical_rvar))
        params = {pa_var: 1 for pa_var in parents}
        # noise = bias + noise (with 0 mean)
        functions[canonical_rvar] = linear_gaussian(params, sum_agg(),
                                                    normal_sampler(mu, sd))

    param_rcm = ParamRCM(rcm.schema, rcm.directed_dependencies, functions)
    generate_values_for_skeleton(param_rcm, skeleton)

    if null_hypothesis:
        U = RelationalVariable(RelationalPath([B, R_BC, C]), X)
        V = RelationalVariable(RelationalPath(B), Y)  # canonical
        W = RelationalVariable(RelationalPath([B, R_AB, A]), Z)

    else:
        W = RelationalVariable(RelationalPath(A), Z)
        U = RelationalVariable(RelationalPath([A, R_AB, B]), Y)
        V = RelationalVariable(RelationalPath([A, R_AC, C]), X)

    return U, V, W, rcm, param_rcm