コード例 #1
0
def get_CrossCatClient(client_type, **kwargs):
    """Helper which instantiates the appropriate Engine and returns a Client

    """

    client = None
    if client_type == 'local':
        import crosscat.LocalEngine as LocalEngine
        le = LocalEngine.LocalEngine(**kwargs)
        client = CrossCatClient(le)
    elif client_type == 'hadoop':
        import crosscat.HadoopEngine as HadoopEngine
        he = HadoopEngine.HadoopEngine(**kwargs)
        client = CrossCatClient(he)
    elif client_type == 'jsonrpc':
        import crosscat.JSONRPCEngine as JSONRPCEngine
        je = JSONRPCEngine.JSONRPCEngine(**kwargs)
        client = CrossCatClient(je)
    elif client_type == 'multiprocessing':
        import crosscat.MultiprocessingEngine as MultiprocessingEngine
        me = MultiprocessingEngine.MultiprocessingEngine(**kwargs)
        client = CrossCatClient(me)
    else:
        raise Exception('unknown client_type: %s' % client_type)
    return client
コード例 #2
0
def quick_le(seed, n_chains=1):
    random.seed(seed)
    numpy.random.seed(seed)
    T, M_r, M_c = du.gen_factorial_data_objects(seed, 2, N_COLS, N_ROWS, 2)
    engine = LE.LocalEngine(seed=seed)
    X_L, X_D = engine.initialize(M_c, M_r, T, n_chains=n_chains)
    return T, M_r, M_c, X_L, X_D, engine
コード例 #3
0
ファイル: server_jsonrpc.py プロジェクト: shafiahmed/crosscat
 def findmethod(self, method, args=None, kwargs=None):
     if method in self.methods:
         next_seed = self.get_next_seed.next()
         engine = LE.LocalEngine(next_seed)
         return getattr(engine, method)
     else:
         return None
コード例 #4
0
ファイル: geweke_utils.py プロジェクト: yarden/crosscat
def _forward_sample_from_prior(inf_seed_and_n_samples, M_c, T,
        probe_columns=(0,),
        ROW_CRP_ALPHA_GRID=(), COLUMN_CRP_ALPHA_GRID=(),
        S_GRID=(), MU_GRID=(),
        N_GRID=default_n_grid,
        ):
    inf_seed, n_samples = inf_seed_and_n_samples
    T = numpy.zeros(numpy.array(T).shape).tolist()
    M_r = du.gen_M_r_from_T(T)
    engine = LE.LocalEngine(inf_seed)
    diagnostics_data = collections.defaultdict(list)
    diagnostics_funcs = None
    for sample_idx in range(n_samples):
        X_L, X_D = engine.initialize(M_c, M_r, T,
                ROW_CRP_ALPHA_GRID=ROW_CRP_ALPHA_GRID,
                COLUMN_CRP_ALPHA_GRID=COLUMN_CRP_ALPHA_GRID,
                S_GRID=S_GRID,
                MU_GRID=MU_GRID,
                N_GRID=N_GRID,
                )
        if diagnostics_funcs is None:
            diagnostics_funcs = generate_diagnostics_funcs(X_L, probe_columns)
        diagnostics_data = collect_diagnostics(X_L, diagnostics_data,
                diagnostics_funcs)
        pass
    return diagnostics_data
コード例 #5
0
ファイル: geweke_utils.py プロジェクト: manderle01/crosscat
def run_posterior_chain(
        seed,
        M_c,
        T,
        num_iters,
        probe_columns=(0, ),
        ROW_CRP_ALPHA_GRID=(),
        COLUMN_CRP_ALPHA_GRID=(),
        S_GRID=(),
        MU_GRID=(),
        N_GRID=default_n_grid,
        CT_KERNEL=0,
        plot_rand_idx=None,
):
    plot_rand_idx = arbitrate_plot_rand_idx(plot_rand_idx, num_iters)
    engine = LE.LocalEngine(seed)
    M_r = du.gen_M_r_from_T(T)
    X_L, X_D = engine.initialize(
        M_c,
        M_r,
        T,
        'from_the_prior',
        ROW_CRP_ALPHA_GRID=ROW_CRP_ALPHA_GRID,
        COLUMN_CRP_ALPHA_GRID=COLUMN_CRP_ALPHA_GRID,
        S_GRID=S_GRID,
        MU_GRID=MU_GRID,
        N_GRID=N_GRID,
    )
    diagnostics_funcs = generate_diagnostics_funcs(X_L, probe_columns)
    diagnostics_data = collections.defaultdict(list)
    for idx in range(num_iters):
        M_c, T, X_L, X_D = run_posterior_chain_iter(
            engine,
            M_c,
            T,
            X_L,
            X_D,
            diagnostics_data,
            diagnostics_funcs,
            ROW_CRP_ALPHA_GRID,
            COLUMN_CRP_ALPHA_GRID,
            S_GRID,
            MU_GRID,
            N_GRID=N_GRID,
            CT_KERNEL=CT_KERNEL,
        )
        if idx == plot_rand_idx:
            # This DOESN'T work with multithreading
            filename = 'T_%s' % idx
            pu.plot_views(numpy.array(T),
                          X_D,
                          X_L,
                          M_c,
                          filename=filename,
                          dir='./',
                          close=True,
                          format=image_format)
            pass
        pass
    return diagnostics_data
コード例 #6
0
def run_mi_test_local(data_dict):

    gen_seed = data_dict['SEED']
    crosscat_seed = data_dict['CCSEED']
    num_clusters = data_dict['num_clusters']
    num_cols = data_dict['num_cols']
    num_rows = data_dict['num_rows']
    num_views = data_dict['num_views']
    corr = data_dict['corr']
    burn_in = data_dict['burn_in']
    mean_range = float(num_clusters)*2.0

    # 32 bit signed int
    random.seed(gen_seed)
    get_next_seed = lambda : random.randrange(2147483647)

    # generate the stats
    T, M_c, M_r, X_L, X_D, view_assignment = mitu.generate_correlated_state(num_rows,
        num_cols, num_views, num_clusters, mean_range, corr, seed=gen_seed);

    table_data = dict(T=T,M_c=M_c)

    engine = LE.LocalEngine(crosscat_seed)
    X_L_prime, X_D_prime = engine.analyze(M_c, T, X_L, X_D, n_steps=burn_in) 

    X_L = X_L_prime
    X_D = X_D_prime

    view_assignment = numpy.array(X_L['column_partition']['assignments'])
 
    # for each view calclate the average MI between all pairs of columns
    n_views = max(view_assignment)+1
    MI = []
    Linfoot = []
    queries = []
    MI = 0.0
    pairs = 0.0
    for view in range(n_views):
        columns_in_view = numpy.nonzero(view_assignment==view)[0]
        combinations = itertools.combinations(columns_in_view,2)
        for pair in combinations:
            any_pairs = True
            queries.append(pair)
            MI_i, Linfoot_i = iu.mutual_information(M_c, [X_L], [X_D], [pair], n_samples=1000)
            MI += MI_i[0][0]
            pairs += 1.0

    
    if pairs > 0.0:
        MI /= pairs

    ret_dict = dict(
        id=data_dict['id'],
        dataset=data_dict['dataset'],
        sample=data_dict['sample'],
        mi=MI,
        )

    return ret_dict
コード例 #7
0
ファイル: server_jsonrpc.py プロジェクト: jayelm/crosscat
 def wrapped(*args, **kwargs):
     engine = LE.LocalEngine(seed)
     _method = getattr(engine, method)
     #
     q = Queue()
     partial = functools.partial(putter, _method, q)
     p = Process(target=partial, args=args, kwargs=kwargs)
     p.start()
     ret_val = q.get()
     p.join()
     return ret_val
コード例 #8
0
ファイル: test_col_ensure.py プロジェクト: wilsondy/crosscat
def quick_le(seed, n_chains=1):
    rng = random.Random(seed)
    T, M_r, M_c = du.gen_factorial_data_objects(get_next_seed(rng), 2, N_COLS,
                                                N_ROWS, 2)
    engine = LE.LocalEngine(seed=get_next_seed(rng))
    X_L, X_D = engine.initialize(M_c,
                                 M_r,
                                 T,
                                 seed=get_next_seed(rng),
                                 n_chains=n_chains)
    return T, M_r, M_c, X_L, X_D, engine
コード例 #9
0
def initialize_helper(table_data, dict_in):
    M_c = table_data['M_c']
    M_r = table_data['M_r']
    T = table_data['T']
    initialization = dict_in['initialization']
    SEED = dict_in['SEED']
    engine = LE.LocalEngine(SEED)
    M_c_prime, M_r_prime, X_L, X_D = \
               engine.initialize(M_c, M_r, T, initialization=initialization)
    #
    ret_dict = dict(X_L=X_L, X_D=X_D)
    return ret_dict
コード例 #10
0
ファイル: main.py プロジェクト: LiuFang816/SALSTM_py_data
def run(stdin, stdout, stderr, argv):
    args = parse_args(argv[1:])
    progname = argv[0]
    slash = progname.rfind('/')
    if slash:
        progname = progname[slash + 1:]
    if args.bdbpath is None and not args.memory:
        stderr.write('%s: pass filename or -m/--memory\n' % (progname, ))
        return 1
    if args.bdbpath == '-':
        stderr.write('%s: missing option?\n' % (progname, ))
        return 1
    bdb = bayeslite.bayesdb_open(pathname=args.bdbpath,
                                 builtin_metamodels=False)

    if args.jobs != 1:
        import crosscat.MultiprocessingEngine as ccme
        jobs = args.jobs if args.jobs > 0 else None
        crosscat = ccme.MultiprocessingEngine(seed=args.seed, cpu_count=jobs)
    else:
        import crosscat.LocalEngine as ccle
        crosscat = ccle.LocalEngine(seed=args.seed)
    metamodel = CrosscatMetamodel(crosscat)
    bayeslite.bayesdb_register_metamodel(bdb, metamodel)
    bdbshell = shell.Shell(bdb, 'crosscat', stdin, stdout, stderr)
    with hook.set_current_shell(bdbshell):
        if not args.no_init_file:
            init_file = os.path.join(os.path.expanduser('~/.bayesliterc'))
            if os.path.isfile(init_file):
                bdbshell.dot_read(init_file)

        if args.file is not None:
            for path in args.file:
                if os.path.isfile(path):
                    bdbshell.dot_read(path)
                else:
                    bdbshell.stdout.write('%s is not a file.  Aborting.\n' %
                                          (str(path), ))
                    break

        if not args.batch:
            bdbshell.cmdloop()
    return 0
コード例 #11
0
def analyze_helper(table_data, dict_in):
    M_c = table_data['M_c']
    T = table_data['T']
    X_L = dict_in['X_L']
    X_D = dict_in['X_D']
    kernel_list = dict_in['kernel_list']
    n_steps = dict_in['n_steps']
    c = dict_in['c']
    r = dict_in['r']
    SEED = dict_in['SEED']
    engine = LE.LocalEngine(SEED)
    X_L_prime, X_D_prime = engine.analyze(M_c,
                                          T,
                                          X_L,
                                          X_D,
                                          kernel_list=kernel_list,
                                          n_steps=n_steps,
                                          c=c,
                                          r=r)
    #
    ret_dict = dict(X_L=X_L, X_D=X_D)
    return ret_dict
コード例 #12
0
def get_generative_clustering(M_c, M_r, T, data_inverse_permutation_indices,
                              num_clusters, num_views):
    # NOTE: this function only works because State.p_State doesn't use
    #       column_component_suffstats
    num_rows = len(T)
    num_cols = len(T[0])
    X_D_helper = numpy.repeat(range(num_clusters), (num_rows / num_clusters))
    gen_X_D = [
        X_D_helper[numpy.argsort(data_inverse_permutation_index)]
        for data_inverse_permutation_index in data_inverse_permutation_indices
    ]
    gen_X_L_assignments = numpy.repeat(range(num_views),
                                       (num_cols / num_views))
    # initialize to generate an X_L to manipulate
    local_engine = LE.LocalEngine()
    bad_X_L, bad_X_D = local_engine.initialize(M_c,
                                               M_r,
                                               T,
                                               initialization='apart')
    bad_X_L['column_partition']['assignments'] = gen_X_L_assignments
    # manually constrcut state in in generative configuration
    state = State.p_State(M_c, T, bad_X_L, gen_X_D)
    gen_X_L = state.get_X_L()
    gen_X_D = state.get_X_D()
    # run inference on hyperparameters to leave them in a reasonable state
    kernel_list = (
        'row_partition_hyperparameters',
        'column_hyperparameters',
        'column_partition_hyperparameter',
    )
    gen_X_L, gen_X_D = local_engine.analyze(M_c,
                                            T,
                                            gen_X_L,
                                            gen_X_D,
                                            n_steps=1,
                                            kernel_list=kernel_list)
    #
    return gen_X_L, gen_X_D
コード例 #13
0
ファイル: test_col_ensure.py プロジェクト: wilsondy/crosscat
def test_two_dependent_one_dependent():
    rng = random.Random(PASS_SEED)
    T, M_r, M_c = du.gen_factorial_data_objects(get_next_seed(rng), 2, 3, 10,
                                                1)
    engine = LE.LocalEngine(seed=get_next_seed(rng))

    # These dependency constraints target the computation of unorm_crp_logps_avg
    # in the case that one variable (1) is independent of another variable (2)
    # which should ensure the CRP probability of the view of (2) is -INFINITY
    # when doing a block proposal of (0,1) into the view of (2). Refer to the
    # comment about compute the CRP probabilities in
    # State::sample_insert_features.
    dep_constraints = [(0, 1, True), (1, 2, False)]

    X_L, X_D = engine.initialize(M_c,
                                 M_r,
                                 T,
                                 seed=get_next_seed(rng),
                                 n_chains=1)

    X_L, X_D = engine.ensure_col_dep_constraints(M_c, M_r, T, X_L, X_D,
                                                 dep_constraints,
                                                 get_next_seed(rng))

    for col1, col2, dep in dep_constraints:
        assert engine.assert_col_dep_constraints(X_L, X_D, col1, col2, dep,
                                                 True)

    X_L, X_D = engine.analyze(M_c,
                              T,
                              X_L,
                              X_D,
                              get_next_seed(rng),
                              n_steps=1000)

    for col1, col2, dep in dep_constraints:
        assert engine.assert_col_dep_constraints(X_L, X_D, col1, col2, dep,
                                                 True)
コード例 #14
0
to_pickle = dict(X_L_list=X_L_list, X_D_list=X_D_list)
fu.pickle(to_pickle, pkl_filename)

# to_pickle = fu.unpickle(pkl_filename)
# X_L_list = to_pickle['X_L_list']
# X_D_list = to_pickle['X_D_list']

# can we recreate a row given some of its values?
query_cols = [2, 6, 9]
query_names = col_names[query_cols]
Q = determine_Q(M_c, query_names, num_rows)
#
condition_cols = [3, 4, 10]
condition_names = col_names[condition_cols]
samples_list = []
engine = LE.LocalEngine(inf_seed)
for actual_row_idx in [1, 10, 100]:
    actual_row_values = T[actual_row_idx]
    condition_values = [
        actual_row_values[condition_col] for condition_col in condition_cols
    ]
    condition_tuples = zip(condition_names, condition_values)
    Y = determine_unobserved_Y(num_rows, M_c, condition_tuples)
    samples = engine.simple_predictive_sample(M_c, X_L_list, X_D_list, Y, Q,
                                              10)
    samples_list.append(samples)

round_1 = lambda value: round(value, 2)
# impute some values (as if they were missing)
for impute_row in [10, 20, 30, 40, 50, 60, 70, 80]:
    impute_cols = [31, 32, 52, 60, 62]
コード例 #15
0
def do_analyze((chain_tuple, seed)):
    (X_L, X_D) = chain_tuple
    engine = LE.LocalEngine(seed)
    X_L, X_D = engine.analyze(M_c, T, X_L, X_D, n_steps=num_transitions)
    return X_L, X_D
コード例 #16
0
def do_initialize(seed):
    engine = LE.LocalEngine(seed)
    X_L, X_D = engine.initialize(M_c, M_r, T)
    return X_L, X_D