Exemple #1
0
def test_simple_nonconj():
    rng = irm.RNG()
    irm_model = irmio.create_model_from_data(data_simple_nonconj, rng=rng)
    
    irmio.set_model_latent(irm_model, latent_simple_nonconj, rng=rng)
    
    a = irm_model.domains['d1'].get_assignments()
    axes = irm_model.relations['R1'].get_axes()
    axes_objs = [(irm_model.domains[dn], irm_model.domains[dn].get_relation_pos('R1')) 
                 for dn in axes]

    comps = model.get_components_in_relation(axes_objs,
                                             irm_model.relations['R1'])

    g0 = a[0]
    g1 = a[2]
    g2 = a[4]

    assert_approx_equal(comps[g0, g0]['p'], 0.0)
    assert_approx_equal(comps[g0, g1]['p'], 0.01)
    assert_approx_equal(comps[g0, g2]['p'], 0.02)

    assert_approx_equal(comps[g1, g0]['p'], 0.1)
    assert_approx_equal(comps[g1, g1]['p'], 0.11)
    assert_approx_equal(comps[g1, g2]['p'], 0.12)


    assert_approx_equal(comps[g2, g0]['p'], 0.2)
    assert_approx_equal(comps[g2, g1]['p'], 0.21)
    assert_approx_equal(comps[g2, g2]['p'], 0.22)
def test_mixture():
    N = 100

    np.random.seed(0)

    d = np.zeros(N, dtype=np.float32)
    for i in range(N / 2):
        d[i] = np.random.normal(-4, 1)
        d[i + N / 2] = np.random.normal(4, 1)

    d = np.random.permutation(d)

    desc = {'f1': {'data': d, 'model': 'NormalInverseChiSq'}}

    latent, data = connattribio.create_mm(desc)
    latent['domains']['d1']['assignment'] = np.arange(N) % 10

    latent, data = irm.data.synth.prior_generate(latent, data)

    rng = irm.RNG()
    irm_model = irm.irmio.create_model_from_data(data, rng=rng)
    irm.irmio.set_model_latent(irm_model, latent, rng)

    kernel_config = irm.runner.default_kernel_anneal()

    for i in range(200):
        irm.runner.do_inference(irm_model, rng, kernel_config, i)

        new_latent = irm.irmio.get_latent(irm_model)
        a = new_latent['domains']['d1']['assignment']

        print irm.util.assign_to_counts(a)
        print new_latent['relations']['r_f1']['hps']
Exemple #3
0
def run_bbconj(infilename, outfilename, seed):
    ITERS = SAMPLER_ITERS

    np.random.seed(seed)

    indata = pickle.load(open(infilename, 'r'))

    model_name = "BetaBernoulli"
    kc = irm.runner.default_kernel_config()

    data = indata['connectivity']

    irm_config = irm.irmio.default_graph_init(data, model_name)

    rng = irm.RNG()
    model = irm.irmio.model_from_config(irm_config, init='crp', rng=rng)

    scores = []
    states = []
    comps = []
    for i in range(ITERS):
        print "iteration", i
        irm.runner.do_inference(model, rng, kc)
        a = model.domains['t1'].get_assignments()

        scores.append(model.total_score())
        states.append(a)

    pickle.dump({
        'scores': scores,
        'states': states,
        'infile': infilename
    }, open(outfilename, 'w'))
Exemple #4
0
def test_io_score_t1t2():

    rng = irm.RNG()

    for D1_N, D2_N in [(10, 20), (20, 30), (200, 300)]:
        for model_name in ["BetaBernoulliNonConj", 
                           "LogisticDistance", 
                           "LinearDistance"]: 

            d = {'domains' : {'d1' : {'N' : D1_N}, 
                              'd2' : {'N' : D2_N}},
                    'relations' : {'R1' : {'relation' : ('d1', 'd2'), 
                                           'model' : model_name}}}

            l = {}

            new_latent, new_data = data.synth.prior_generate(l, d)

            irm_model = irmio.create_model_from_data(new_data, rng=rng)
            irmio.set_model_latent(irm_model, new_latent, rng=rng)

            s1 = irm_model.total_score()

            extracted_latent = irmio.get_latent(irm_model)

            irm_model2 = irmio.create_model_from_data(new_data, rng=rng)
            irmio.set_model_latent(irm_model, extracted_latent, rng=rng)

            s2 = irm_model.total_score()
            np.testing.assert_approx_equal(s1, s2, 5)
Exemple #5
0
def create_truth_bb(dbfile, outfiles):
    conn = sqlite3.connect(dbfile)
    for THOLD_i, outfile in zip(THOLDS, outfiles):
        cells, conn_mat, dist_mats = preprocess.create_data(
            conn, process.THOLDS[THOLD_i])

        irm_latent, irm_data = irm.irmio.default_graph_init(
            conn_mat, 'BetaBernoulliNonConj')

        irm_latent['relations']['R1']['hps'] = {'alpha': 1.0, 'beta': 1.0}

        irm_latent['domains']['d1'][
            'assignment'] = irm.util.canonicalize_assignment(cells['type_id'])

        irm_model = irm.irmio.create_model_from_data(irm_data)
        rng = irm.RNG()
        irm.irmio.set_model_latent(irm_model, irm_latent, rng)
        irm.irmio.estimate_suffstats(irm_model, rng, ITERS=40)

        learned_latent = irm.irmio.get_latent(irm_model)

        pred = compute_prob_matrix(learned_latent,
                                   irm_data,
                                   model_name="BetaBernoulliNonConj")

        pickle.dump(
            {
                'pred_mat': pred,
                'truth_mat': irm_data['relations']['R1']['data'],
                'thold_i': THOLD_i
            }, open(outfile, 'w'))
Exemple #6
0
def create_truth(dbfile, outfiles):
    conn = sqlite3.connect(dbfile)
    for THOLD_i, outfile in zip(THOLDS, outfiles):
        cells, conn_mat, dist_mats = preprocess.create_data(
            conn, process.THOLDS[THOLD_i])

        irm_latent, irm_data = models.create_conn_dist_lowlevel(
            conn_mat, dist_mats, 'xyz', model_name="LogisticDistance")

        irm_latent['relations']['R1']['hps'] = {
            'lambda_hp': 50.0,
            'mu_hp': 50.0,
            'p_max': 0.9,
            'p_min': 0.01
        }

        irm_latent['domains']['d1'][
            'assignment'] = irm.util.canonicalize_assignment(cells['type_id'])

        irm_model = irm.irmio.create_model_from_data(irm_data)
        rng = irm.RNG()
        irm.irmio.set_model_latent(irm_model, irm_latent, rng)
        irm.irmio.estimate_suffstats(irm_model, rng, ITERS=40)

        learned_latent = irm.irmio.get_latent(irm_model)

        pred = compute_prob_matrix(learned_latent, irm_data)

        pickle.dump(
            {
                'pred_mat': pred,
                'truth_mat': irm_data['relations']['R1']['data']['link'],
                'thold_i': THOLD_i
            }, open(outfile, 'w'))
Exemple #7
0
def test_slice_normal():
    def dens(x): 
        #mixture of gaussian
        mus = [-1.5, 2]
        vars = [1, 1]
        pis = [0.25, 0.75]
        return np.logaddexp.accumulate([(np.log(pi)  +  util.log_norm_dens(x, mu, var)) for (pi, mu, var) in zip(pis, mus, vars)])[-1]
        # return util.log_norm_dens(x, 0, 1.0)

    rng = irm.RNG()
    ITERS = 100000
    
    x = 0
    results = np.zeros(ITERS)
    
    for i in range(ITERS):
        x = irm.slice_sample(x, dens, rng, 50.0)
        results[i] = x
    MIN = -5
    MAX = 5
    BINS = 100
    x = np.linspace(MIN, MAX, BINS)
    bin_width = x[1] - x[0]

    y = [dens(a + bin_width/2) for a in x[:-1]]
    p = np.exp(y)
    p = p/np.sum(p)/(x[1]-x[0])


    hist, bin_edges = np.histogram(results, x, normed=True)

    kl=  util.kl(hist, p)
    assert kl < 0.1
Exemple #8
0
def test_simple_nonconj_inout():
    rng = irm.RNG()
    irm_model = irmio.create_model_from_data(data_simple_nonconj, rng=rng)
    
    irmio.set_model_latent(irm_model, latent_simple_nonconj, rng=rng)

    latent = irmio.get_latent(irm_model)
    irmio.latent_equality(latent_simple_nonconj, latent, data_simple_nonconj)
Exemple #9
0
def run_inference(infile, outfile):
    data = pickle.load(open(infile, 'r'))
    df = data['featuredf']
    df_vals = df[np.isfinite(df['contact_x_mean'])][:100]

    N = len(df_vals)

    desc = {
        'soma_x': {
            'data': to_f32(df_vals['soma_x']),
            'model': 'NormalInverseChiSq'
        },
        # 'contact_spatial_std' : {'data' : to_f32(df_vals['contact_spatial_std']),
        #                          'model' : 'NormalInverseChiSq'},
    }

    for i, bi in enumerate(features.BINS[:-1]):
        a = np.array(
            [row['contact_area_hist'][i] for row_i, row in df_vals.iterrows()],
            dtype=np.float32)
        print a
        desc['contact_x_hist_%d' % i] = {
            'data': a,
            'model': 'NormalInverseChiSq'
        }

    latent, data = connattribio.create_mm(desc)
    latent['domains']['d1']['assignment'] = np.arange(N) % 40

    latent, data = irm.data.synth.prior_generate(latent, data)

    rng = irm.RNG()
    irm_model = irm.irmio.create_model_from_data(data, rng=rng)
    irm.irmio.set_model_latent(irm_model, latent, rng)

    kernel_config = irm.runner.default_kernel_anneal()
    kernel_config[0][1]['subkernels'][-1][1]['grids'][
        'NormalInverseChiSq'] = irm.gridgibbshps.default_grid_normal_inverse_chi_sq(
            mu_scale=10, var_scale=1, GRIDN=10)
    kernel_config[0][1]['subkernels'][-1][1]['grids'][
        'r_soma_x'] = soma_x_hp_grid()

    MAX_ITERS = 200
    for i in range(MAX_ITERS):
        irm.runner.do_inference(irm_model, rng, kernel_config, i)

        new_latent = irm.irmio.get_latent(irm_model)
        a = new_latent['domains']['d1']['assignment']

        print irm.util.assign_to_counts(a)
        print "i=", i, "MAX_ITERS=", MAX_ITERS

    pickle.dump({
        'assignment': a,
        'latent': new_latent,
        'data': data
    }, open(outfile, 'w'))
Exemple #10
0
def create_init(latent_filename,
                data_filename,
                out_filenames,
                init=None,
                keep_ground_truth=True):
    """ 
    CONVENTION: when we create N inits, the first is actually 
    initialized from the "ground truth" of the intial init (whatever
    that happened to be)

    # FIXME : add ability to init multiple domains
    """
    irm_latent = pickle.load(open(latent_filename, 'r'))
    irm_data = pickle.load(open(data_filename, 'r'))
    irm_latents = []

    rng = irm.RNG()

    irm_model = irm.irmio.create_model_from_data(irm_data, rng=rng)
    for c, out_f in enumerate(out_filenames):
        print "generating init", out_f
        np.random.seed(c)

        latent = copy.deepcopy(irm_latent)

        d_N = len(latent['domains']['d1']['assignment'])
        if init['type'] == 'fixed':
            group_num = init['group_num']

            a = np.arange(d_N) % group_num
            a = np.random.permutation(a)

        elif init['type'] == 'crp':
            alpha = init['alpha']
            a = irm.util.crp_draw(d_N, alpha)
            a = np.random.permutation(a)
        elif init['type'] == 'truth':
            a = latent['domains']['d1']['assignment']

        else:
            raise NotImplementedError("Unknown init type")

        if (not keep_ground_truth) or (c > 0):  # first one stays the same
            latent['domains']['d1']['assignment'] = a

        # generate new suffstats, recompute suffstats in light of new assignment

        irm.irmio.set_model_latent(irm_model, latent, rng)
        print "estimating suffstats for %s" % out_f
        irm.irmio.estimate_suffstats(irm_model, rng, ITERS=2)
        print "ss estimation done for ", out_f

        pickle.dump(irm.irmio.get_latent(irm_model), open(out_f, 'w'))
def create_init_pure(irm_latent,
                     irm_data,
                     OUT_N,
                     init=None,
                     keep_ground_truth=True):
    """ 
    CONVENTION: when we create N inits, the first is actually 
    initialized from the "ground truth" of the intial init (whatever
    that happened to be)

    # FIXME : add ability to init multiple domains
    """
    irm_latents = []

    rng = irm.RNG()

    irm_model = irm.irmio.create_model_from_data(irm_data, rng=rng)
    for c in range(OUT_N):
        np.random.seed(c)

        latent = copy.deepcopy(irm_latent)

        d_N = len(latent['domains']['d1']['assignment'])
        if init['type'] == 'fixed':
            group_num = init['group_num']

            a = np.arange(d_N) % group_num
            a = np.random.permutation(a)

        elif init['type'] == 'crp':
            alpha = init['alpha']
            a = irm.util.crp_draw(d_N, alpha)
            a = np.random.permutation(a)
        elif init['type'] == 'truth':
            a = latent['domains']['d1']['assignment']

        else:
            raise NotImplementedError("Unknown init type")

        if (not keep_ground_truth) or (c > 0):  # first one stays the same
            latent['domains']['d1']['assignment'] = a

        # generate new suffstats, recompute suffstats in light of new assignment

        irm.irmio.set_model_latent(irm_model, latent, rng)

        irm.irmio.estimate_suffstats(irm_model, rng, ITERS=2)

        yield irm.irmio.get_latent(irm_model)
Exemple #12
0
def run_ld(infilename, outfilename, seed):
    ITERS = SAMPLER_ITERS

    np.random.seed(seed)

    indata = pickle.load(open(infilename, 'r'))

    model_name = "LogisticDistance"
    kc = irm.runner.default_kernel_nonconj_config()
    kc[0][1]['M'] = 30

    data = indata['conn_and_dist']

    irm_config = irm.irmio.default_graph_init(data, model_name)

    HPS = {'mu_hp': 1.0, 'lambda_hp': 1.0, 'p_min': 0.1, 'p_max': 0.9}
    irm_config['relations']['R1']['hps'] = HPS

    rng = irm.RNG()

    model = irm.irmio.model_from_config(irm_config, init='crp', rng=rng)

    rel = model.relations['R1']
    doms = [(model.domains['t1'], 0), (model.domains['t1'], 0)]
    scores = []
    states = []
    comps = []
    for i in range(ITERS):
        print "iteration", i
        irm.runner.do_inference(model, rng, kc)
        a = model.domains['t1'].get_assignments()

        components = irm.model.get_components_in_relation(doms, rel)

        scores.append(model.total_score())
        states.append(a)
        comps.append(components)

    pickle.dump(
        {
            'scores': scores,
            'states': states,
            'components': components,
            'infile': infilename,
            'hps': HPS
        }, open(outfilename, 'w'))
def test_mixture_bb():
    ENTITY_PER_GROUP = 50
    GROUPS = 4
    N = ENTITY_PER_GROUP * GROUPS
    DIM = 4

    np.random.seed(0)

    gv = np.random.beta(0.2, 0.2, size=(GROUPS, DIM))

    mat = np.zeros((N, DIM), dtype=np.uint8)
    for g in range(GROUPS):
        for i in range(ENTITY_PER_GROUP):
            for d in range(DIM):
                mat[g * ENTITY_PER_GROUP + i, d] = np.random.rand() < gv[g, d]

    #mat = np.random.permutation(mat)
    desc = {}
    for d in range(DIM):
        desc['f%d' % d] = {'data': mat[:, d], 'model': 'BetaBernoulli'}

    latent, data = connattribio.create_mm(desc)
    latent['domains']['d1']['assignment'] = np.arange(N) % 10

    latent, data = irm.data.synth.prior_generate(latent, data)

    rng = irm.RNG()
    irm_model = irm.irmio.create_model_from_data(data, rng=rng)
    irm.irmio.set_model_latent(irm_model, latent, rng)

    kernel_config = irm.runner.default_kernel_anneal()

    for i in range(150):
        irm.runner.do_inference(irm_model, rng, kernel_config, i)

        new_latent = irm.irmio.get_latent(irm_model)
        a = new_latent['domains']['d1']['assignment']

        print irm.util.assign_to_counts(a)
        print new_latent['relations']['r_f1']['hps']

    pylab.imshow(mat[np.argsort(a)])
    pylab.show()
Exemple #14
0
def test_slice_exp():
    """
    Test on a distribution with support on the positive reals
    """

    def dens(x): 
        #mixture of gaussian
        lamb = 2.47
        if x < 0:
            return -np.inf
        else:
            return -x * lamb
        
        # return util.log_norm_dens(x, 0, 1.0)

    rng = irm.RNG()
    ITERS = 1000000
    
    x = 0
    results = np.zeros(ITERS)
    
    for i in range(ITERS):
        x = irm.slice_sample(x, dens, rng, 0.5)
        results[i] = x
    MIN = -1
    MAX = 4
    BINS = 101
    x = np.linspace(MIN, MAX, BINS)
    bin_width = x[1] - x[0]

    y = [dens(a + bin_width/2) for a in x[:-1]]
    p = np.exp(y)
    p = p/np.sum(p)/(x[1]-x[0])


    hist, bin_edges = np.histogram(results, x, normed=True)

    kl=  util.kl(hist, p)
    assert kl < 0.1
Exemple #15
0
def test_parallel_tempering():
    
    rng = irm.RNG()

    D1_N = 100
    model_name = "BetaBernoulliNonConj"

    d = {'domains' : {'d1' : {'N' : D1_N}}, 
         'relations' : {'R1' : {'relation' : ('d1', 'd1'), 
                                'model' : model_name}}}

    l = {}

    new_latent, new_data = data.synth.prior_generate(l, d)
    
    config = [('parallel_tempering', {'temps' : [1.0, 2.0, 4.0, 8.0], 
                                        'subkernels' : runner.default_kernel_nonconj_config()})]

    r = runner.Runner(new_latent, new_data, config)
    for i in range(100):
        print "tt", i, r.get_score()
        r.run_iters(1)
def test_io():
    N = 10
    desc = {
        'f1': {
            'data': np.zeros(N, dtype=np.bool),
            'model': 'BetaBernoulli'
        }
    }

    latent, data = connattribio.create_mm(desc)

    latent, data = irm.data.synth.prior_generate(latent, data)
    print data
    print latent

    assert_equal(len(latent['domains']), 2)
    assert_equal(len(latent['relations']), 1)
    assert_equal(len(data['domains']), 2)
    assert_equal(len(data['relations']), 1)

    rng = irm.RNG()
    irm_model = irm.irmio.create_model_from_data(data, rng=rng)
    irm.irmio.set_model_latent(irm_model, latent, rng)
Exemple #17
0
def test_set_components():
    """
    
    """
    T1_N = 10
    T2_N = 20
    np.random.seed(0)
    rng = irm.RNG()

    data = np.random.rand(T1_N, T2_N) > 0.5
    data.shape = T1_N, T2_N

    m = models.BetaBernoulli()
    r = Relation([('T1', T1_N), ('T2', T2_N)], data, m)
    hps = m.create_hps()
    hps['alpha'] = 1.0
    hps['beta'] = 1.0

    r.set_hps(hps)

    tf_1 = model.DomainInterface(T1_N, {'r': ('T1', r)})
    tf_1.set_hps({'alpha': 1.0})
    tf_2 = model.DomainInterface(T2_N, {'r': ('T2', r)})
    tf_2.set_hps({'alpha': 1.0})

    T1_GRPN = 4
    t1_assign = np.arange(T1_N) % T1_GRPN
    t1_grps = {}
    for i, gi in enumerate(t1_assign):
        if gi not in t1_grps:
            g = tf_1.create_group(rng)
            t1_grps[gi] = g
        tf_1.add_entity_to_group(t1_grps[gi], i)

    T2_GRPN = 4
    t2_assign = np.arange(T2_N) % T2_GRPN
    t2_grps = {}
    for i, gi in enumerate(t2_assign):
        if gi not in t2_grps:
            g = tf_2.create_group(rng)
            t2_grps[gi] = g
        tf_2.add_entity_to_group(t2_grps[gi], i)

    t1_assign_g = tf_1.get_assignments()
    t2_assign_g = tf_2.get_assignments()

    allmodel = model.IRM({'T1': tf_1, 'T2': tf_2}, {'R1': r})

    lastscore = allmodel.total_score()
    for t1_g in np.unique(t1_assign_g):
        for t2_g in np.unique(t2_assign_g):
            t1_entities = np.argwhere(t1_assign_g == t1_g).flatten()
            t2_entities = np.argwhere(t2_assign_g == t2_g).flatten()

            dps = []
            for e1 in t1_entities:
                for e2 in t2_entities:
                    dps.append(data[e1, e2])
            heads = np.sum(np.array(dps) == 1)
            tails = np.sum(np.array(dps) == 0)
            # check if the current value is correct
            c = r.get_component((tf_1.get_relation_groupid(0, t1_g),
                                 tf_2.get_relation_groupid(0, t2_g)))
            assert_equal(heads, c['heads'])
            assert_equal(tails, c['tails'])

            # now we set them to a random value
            c = r.set_component((tf_1.get_relation_groupid(
                0, t1_g), tf_2.get_relation_groupid(0, t2_g)), {
                    'heads': int(heads),
                    'tails': int(tails) + 1
                })

            assert allmodel.total_score() != lastscore
            lastscore = allmodel.total_score()

            c = r.set_component((tf_1.get_relation_groupid(
                0, t1_g), tf_2.get_relation_groupid(0, t2_g)), {
                    'heads': int(heads) + 1,
                    'tails': int(tails) + 1
                })

            assert allmodel.total_score() != lastscore
            lastscore = allmodel.total_score()
Exemple #18
0
    for a in (list(t1_t2_datasets()) + list(t1_t1_datasets())):
        latent_filename = a[1][0]
        data_filename = a[1][1]
        outfilename = latent_filename[:-(len("latent"))] + 'scores'
        if 'conj' in latent_filename:
            yield (latent_filename, data_filename), outfilename


@follows(t1_t2_datasets)
@follows(t1_t1_datasets)
@files(score_params)
def score((latent_filename, data_filename), outfilename):
    latent = pickle.load(open(latent_filename, 'r'))
    data = pickle.load(open(data_filename, 'r'))

    rng = irm.RNG()

    irm_model = irmio.create_model_from_data(data, rng=rng)
    irmio.set_model_latent(irm_model, latent, rng)

    # now we go through and score every possible latent
    domain_names = sorted(data['domains'].keys())
    domain_sizes = [data['domains'][dn]['N'] for dn in domain_names]

    # create the dict
    candidate_partitions = list(putil.enumerate_possible_latents(domain_sizes))
    CANDIDATE_N = len(candidate_partitions)
    scores = {}
    for cpi, cp in enumerate(candidate_partitions):
        t1 = time.time()
        for di, av in enumerate(cp):
Exemple #19
0
def cluster_z_matrix(z,
                     INIT_GROUPS=100,
                     crp_alpha=5.0,
                     beta=0.1,
                     ITERS=4,
                     method='dpmm_bb'):

    N = len(z)
    # create the data
    if method == 'dpmm_bb':
        model = "BetaBernoulli"
        assert z.dtype == np.bool
        hps = {'alpha': beta, 'beta': beta}

    elif method == "dpmm_gp":
        model = "GammaPoisson"
        assert z.dtype == np.uint32
        hps = {'alpha': 2.0, 'beta': 2.0}

    else:
        raise NotImplementedError("unknown method")

    data = {
        'domains': {
            'd1': {
                'N': N
            }
        },
        'relations': {
            'R1': {
                'relation': ('d1', 'd1'),
                'model': model,
                'data': z
            }
        }
    }

    latent_init = {
        'domains': {
            'd1': {
                'assignment': np.arange(N) % INIT_GROUPS,
                'hps': {
                    'alpha': crp_alpha
                }
            }
        },
        'relations': {
            'R1': {
                'hps': hps
            }
        }
    }

    rng = irm.RNG()
    irm_model = irm.irmio.create_model_from_data(data, rng=rng)

    irm.irmio.set_model_latent(irm_model, latent_init, rng=rng)

    run = irm.runner.Runner(latent_init, data,
                            irm.runner.default_kernel_config())
    run.run_iters(ITERS)

    state = run.get_state()
    return irm.util.canonicalize_assignment(
        state['domains']['d1']['assignment'])
Exemple #20
0
def run_inference_cxl(infile, outfile):
    np.random.seed(1)
    data = pickle.load(open(infile, 'r'))
    df = data['featuredf']
    df_vals = df[np.isfinite(df['contact_x_mean'])]

    N = len(df_vals)

    # convert into a real list of lists
    contact_x_list = np.zeros(
        N, dtype=irm.models.MixtureModelDistribution().data_dtype())

    for xi, x in enumerate(df_vals['contact_x_list']):
        # in the event of > 1024 we randomly pick 1024
        y = np.array(x)
        x_min = features.BINS[0]
        x_max = features.BINS[-1]
        y = (y - x_min) / (x_max - x_min)
        # normed to [0, 1]
        contact_x_list[xi]['points'][:len(y)] = y
        contact_x_list[xi]['len'] = len(y)

    desc = {
        'soma_x': {
            'data': to_f32(df_vals['soma_x']),
            'model': 'NormalInverseChiSq'
        },
        # 'contact_spatial_std' : {'data' : to_f32(df_vals['contact_spatial_std']),
        #                          'model' : 'NormalInverseChiSq'},
        'contact_x_list': {
            'data': contact_x_list,
            'model': 'MixtureModelDistribution'
        }
    }

    latent, data = connattribio.create_mm(desc)
    latent['domains']['d1']['assignment'] = np.arange(N) % 40

    COMP_K = 4
    latent['relations']['r_contact_x_list']['hps'] = {
        'comp_k': COMP_K,
        'var_scale': 0.1,
        'dir_alpha': 1.0
    }

    latent, data = irm.data.synth.prior_generate(latent, data)

    rng = irm.RNG()
    irm_model = irm.irmio.create_model_from_data(data, rng=rng)
    irm.irmio.set_model_latent(irm_model, latent, rng)

    kernel_config = irm.runner.default_kernel_anneal(start_temp=64,
                                                     iterations=250)

    kernel_config[0][1]['subkernels'][-1][1]['grids'][
        'r_soma_x'] = soma_x_hp_grid()

    kernel_config[0][1]['subkernels'][-1][1]['grids'][
        'MixtureModelDistribution'] = [{
            'comp_k': 4,
            'dir_alpha': 1.0,
            'var_scale': 0.1
        }]

    MAX_ITERS = 400
    for i in range(MAX_ITERS):
        irm.runner.do_inference(irm_model, rng, kernel_config, i)

        new_latent = irm.irmio.get_latent(irm_model)
        a = new_latent['domains']['d1']['assignment']

        print irm.util.assign_to_counts(a)
        print "i=", i, "MAX_ITERS=", MAX_ITERS

    pickle.dump({
        'assignment': a,
        'latent': new_latent,
        'data': data
    }, open(outfile, 'w'))
Exemple #21
0
def test_slice_nonconj():
    T1_N = 10
    T2_N = 20
    np.random.seed(0)
    rng = irm.RNG()

    data = np.random.rand(T1_N, T2_N) > 0.5
    data.shape = T1_N, T2_N

    m =  models.BetaBernoulliNonConj()
    r = irm.Relation([('T1', T1_N), ('T2', T2_N)], 
                     data,m)
    hps = m.create_hps()
    hps['alpha'] = 1.0
    hps['beta'] = 1.0

    r.set_hps(hps)

    tf_1 = model.DomainInterface(T1_N, {'r': ('T1', r)})
    tf_1.set_hps({'alpha' : 1.0})
    tf_2 = model.DomainInterface(T2_N, {'r' : ('T2', r)})
    tf_2.set_hps({'alpha' : 1.0})

    T1_GRPN = 4
    t1_assign = np.arange(T1_N) % T1_GRPN
    t1_grps = {}
    for i, gi in enumerate(t1_assign):
        if gi not in t1_grps:
            g = tf_1.create_group(rng)
            t1_grps[gi] = g
        tf_1.add_entity_to_group(t1_grps[gi], i)

    T2_GRPN = 4
    t2_assign = np.arange(T2_N) % T2_GRPN
    t2_grps = {}
    for i, gi in enumerate(t2_assign):
        if gi not in t2_grps:
            g = tf_2.create_group(rng)
            t2_grps[gi] = g
        tf_2.add_entity_to_group(t2_grps[gi], i)


    t1_assign_g = tf_1.get_assignments()
    t2_assign_g = tf_2.get_assignments()

    # build list of coords / heads/tails
    coord_data = {}
    for t1_g in np.unique(t1_assign_g):
        for t2_g in np.unique(t2_assign_g):
            t1_entities = np.argwhere(t1_assign_g == t1_g).flatten()
            t2_entities = np.argwhere(t2_assign_g == t2_g).flatten()
            
            dps = []
            for e1 in t1_entities:
                for e2 in t2_entities:
                    dps.append(data[e1, e2])
            heads = np.sum(np.array(dps)==1)
            tails = np.sum(np.array(dps)==0)
            # coords = ((tf_1.get_relation_groupid(0, t1_g), 
            #            tf_2.get_relation_groupid(0, t2_g)))
            coord_data[(t1_g, t2_g)] = (heads, tails)

    # get all the components from this relation
    # now the histograms

    for alpha, beta in [(1.0, 1.0), (10.0, 1.0), 
                        (1.0, 10.0),(0.1, 5.0)]:
        coords_hist = {k : [] for k in coord_data}

        print "alpha=", alpha, "beta=", beta, "="*50
        hps['alpha'] = alpha
        hps['beta'] = beta

        r.set_hps(hps)

        ITERS = 100000
        for i in range(ITERS):
            r.apply_comp_kernel("slice_sample", rng, {'width' : 0.4})

            component_data = model.get_components_in_relation([(tf_1, 0), 
                                                            (tf_2, 0)], 
                                                              r)

            for c in coord_data:
                coords_hist[c].append(component_data[c]['p'])
        for c in coords_hist:
            heads, tails = coord_data[c]
            empirical_p = np.mean(coords_hist[c])
            true_map_p = float(heads + alpha) / (heads +tails + alpha + beta)
            print empirical_p - true_map_p
            np.testing.assert_approx_equal(empirical_p, true_map_p, 2)