Example #1
0
def analyze_helper(table_data, data_dict, command_dict):
    M_c = table_data['M_c']
    T = table_data['T']
    SEED = data_dict['SEED']
    X_L = data_dict['X_L']
    X_D = data_dict['X_D']
    kernel_list = command_dict['kernel_list']
    n_steps = command_dict['n_steps']
    c = command_dict['c']
    r = command_dict['r']
    max_time = command_dict['max_time']
    engine = LE.LocalEngine(SEED)
    X_L_prime, X_D_prime = engine.analyze(M_c,
                                          T,
                                          X_L,
                                          X_D,
                                          kernel_list=kernel_list,
                                          n_steps=n_steps,
                                          c=c,
                                          r=r,
                                          max_time=max_time)
    SEED = engine.get_next_seed()
    #
    ret_dict = dict(SEED=SEED, X_L=X_L_prime, X_D=X_D_prime)
    return ret_dict
def mi_analyze_helper(table_data, data_dict, command_dict):

    gen_seed = data_dict['SEED']
    crosscat_seed = data_dict['CCSEED']
    num_clusters = data_dict['num_clusters']
    num_cols = data_dict['num_cols']
    num_rows = data_dict['num_rows']
    num_views = data_dict['num_views']
    corr = data_dict['corr']
    burn_in = data_dict['burn_in']
    mean_range = float(num_clusters)*2.0

    # 32 bit signed int
    random.seed(gen_seed)
    get_next_seed = lambda : random.randrange(2147483647)

    # generate the stats
    T, M_c, M_r, X_L, X_D, view_assignment = mitu.generate_correlated_state(num_rows,
        num_cols, num_views, num_clusters, mean_range, corr, seed=gen_seed);

    table_data = dict(T=T,M_c=M_c)

    engine = LE.LocalEngine(crosscat_seed)
    X_L_prime, X_D_prime = engine.analyze(M_c, T, X_L, X_D, n_steps=burn_in) 

    X_L = X_L_prime
    X_D = X_D_prime

    view_assignment = numpy.array(X_L['column_partition']['assignments'])
 
    # for each view calclate the average MI between all pairs of columns
    n_views = max(view_assignment)+1
    MI = []
    Linfoot = []
    queries = []
    MI = 0.0
    pairs = 0.0
    for view in range(n_views):
        columns_in_view = numpy.nonzero(view_assignment==view)[0]
        combinations = itertools.combinations(columns_in_view,2)
        for pair in combinations:
            any_pairs = True
            queries.append(pair)
            MI_i, Linfoot_i = iu.mutual_information(M_c, [X_L], [X_D], [pair], n_samples=1000)
            MI += MI_i[0][0]
            pairs += 1.0

    
    if pairs > 0.0:
        MI /= pairs

    ret_dict = dict(
        id=data_dict['id'],
        dataset=data_dict['dataset'],
        sample=data_dict['sample'],
        mi=MI,
        )

    return ret_dict
Example #3
0
def initialize_helper(table_data, data_dict, command_dict):
    M_c = table_data['M_c']
    M_r = table_data['M_r']
    T = table_data['T']
    SEED = data_dict['SEED']
    initialization = command_dict['initialization']
    engine = LE.LocalEngine(SEED)
    X_L, X_D = engine.initialize(M_c, M_r, T, initialization=initialization)
    SEED = engine.get_next_seed()
    #
    ret_dict = dict(SEED=SEED, X_L=X_L, X_D=X_D)
    return ret_dict
Example #4
0
def convergence_analyze_helper(table_data, data_dict, command_dict):
    gen_seed = data_dict['SEED']
    num_clusters = data_dict['num_clusters']
    num_cols = data_dict['num_cols']
    num_rows = data_dict['num_rows']
    num_views = data_dict['num_views']
    max_mean = data_dict['max_mean']
    n_test = data_dict['n_test']
    num_transitions = data_dict['n_steps']
    block_size = data_dict['block_size']
    init_seed = data_dict['init_seed']

    # generate some data
    T, M_r, M_c, data_inverse_permutation_indices = \
            du.gen_factorial_data_objects(gen_seed, num_clusters,
                    num_cols, num_rows, num_views,
                    max_mean=max_mean, max_std=1,
                    send_data_inverse_permutation_indices=True)
    view_assignment_ground_truth = \
            ctu.determine_synthetic_column_ground_truth_assignments(num_cols,
                    num_views)
    X_L_gen, X_D_gen = ttu.get_generative_clustering(
        M_c, M_r, T, data_inverse_permutation_indices, num_clusters, num_views)
    T_test = ctu.create_test_set(M_c, T, X_L_gen, X_D_gen, n_test, seed_seed=0)
    generative_mean_test_log_likelihood = \
            ctu.calc_mean_test_log_likelihood(M_c, T, X_L_gen, X_D_gen, T_test)

    # additional set up
    engine = LE.LocalEngine(init_seed)
    column_ari_list = []
    mean_test_ll_list = []
    elapsed_seconds_list = []

    # get initial ARI, test_ll
    with gu.Timer('initialize', verbose=False) as timer:
        X_L, X_D = engine.initialize(M_c,
                                     M_r,
                                     T,
                                     initialization='from_the_prior')
    column_ari = ctu.get_column_ARI(X_L, view_assignment_ground_truth)
    column_ari_list.append(column_ari)
    mean_test_ll = ctu.calc_mean_test_log_likelihood(M_c, T, X_L, X_D, T_test)
    mean_test_ll_list.append(mean_test_ll)
    elapsed_seconds_list.append(timer.elapsed_secs)

    # run blocks of transitions, recording ARI, test_ll progression
    completed_transitions = 0
    n_steps = min(block_size, num_transitions)
    while (completed_transitions < num_transitions):
        # We won't be limiting by time in the convergence runs
        with gu.Timer('initialize', verbose=False) as timer:
            X_L, X_D = engine.analyze(M_c,
                                      T,
                                      X_L,
                                      X_D,
                                      kernel_list=(),
                                      n_steps=n_steps,
                                      max_time=-1)
        completed_transitions = completed_transitions + block_size
        #
        column_ari = ctu.get_column_ARI(X_L, view_assignment_ground_truth)
        column_ari_list.append(column_ari)
        mean_test_ll = ctu.calc_mean_test_log_likelihood(
            M_c, T, X_L, X_D, T_test)
        mean_test_ll_list.append(mean_test_ll)
        elapsed_seconds_list.append(timer.elapsed_secs)

    ret_dict = dict(
        num_rows=num_rows,
        num_cols=num_cols,
        num_views=num_views,
        num_clusters=num_clusters,
        max_mean=max_mean,
        column_ari_list=column_ari_list,
        mean_test_ll_list=mean_test_ll_list,
        generative_mean_test_log_likelihood=generative_mean_test_log_likelihood,
        elapsed_seconds_list=elapsed_seconds_list,
        n_steps=num_transitions,
        block_size=block_size,
    )
    return ret_dict