コード例 #1
0
ファイル: learn.py プロジェクト: yyan162/tribeflow
def fit(trace_fpath, num_topics, alpha_zh, beta_zs, kernel, \
        residency_priors, num_iter, burn_in, from_=0, to=np.inf):
    '''
    Learns the latent topics from a temporal hypergraph trace. 

    Node-Sherlock is a EM algorithm in which the E-Step is a gibbs sample update,
    thus the reason we use a `num_iter` and `burn_in` approach, instead of the 
    usual convergence approach.

    Parameters
    ----------
    trace_fpath : str
        The path of the trace. Each line should be a \
                (timestamp, hypernode, source, destination) where the \
                timestamp is a long (seconds or milliseconds from epoch).

    num_topics : int
        The number of latent spaces to learn

    alpha_zh : float
        The value of the alpha_zh hyperparameter

    beta_zs : float
        The value of the beta_zs (beta) hyperaparameter

    kernel : Kernel object
        The kernel to use

    residency_priors : array of float
        The kernel hyper parameters

    num_iter : int
        The number of iterations to learn the model from

    burn_in : int
        The burn_in of the chain
    
    Returns
    -------
    
    A dictionary with the results.
    '''
    Dts, Trace, previous_stamps, Count_zh, Count_sz, \
            count_h, count_z, prob_topics_aux, Theta_zh, Psi_sz, \
            hyper2id, source2id = \
            dataio.initialize_trace(trace_fpath, num_topics, num_iter, \
            from_, to)
    
    em(Dts, Trace, previous_stamps, Count_zh, \
            Count_sz, count_h, count_z, alpha_zh, beta_zs, \
            prob_topics_aux, Theta_zh, Psi_sz, num_iter, \
            burn_in, kernel)
    
    rv = prepare_results(trace_fpath, num_topics, alpha_zh, beta_zs, \
            kernel, residency_priors, num_iter, burn_in, Dts, Trace, \
            Count_zh, Count_sz, count_h, \
            count_z, prob_topics_aux, Theta_zh, Psi_sz, hyper2id, \
            source2id, from_, to)
    rv['algorithm'] = np.asarray(['serial gibbs + em'])
    return rv
コード例 #2
0
def sample(Dts, Trace, Count_zh, Count_sz_local, \
        count_h, count_z, alpha_zh, beta_zs, kernel, num_iter, comm):
    
    previous_encounters_s = {}
    for other_processor in xrange(1, comm.size):
        previous_encounters_s[other_processor] = np.zeros_like(Count_sz_local)

    stamps = StampLists(Count_zh.shape[0])
    for z in xrange(Count_zh.shape[0]):
        idx = Trace[:, -1] == z
        #dts_assigned = Dts[idx:, 0].ravel().copy()
        #np.sort(dts_assigned)
        stamps._extend(z, Dts[idx][:, -1])
    
    aux = np.zeros(Count_zh.shape[0], dtype='f8')
 
    Count_sz_pair = np.zeros_like(Count_sz_local)
    Count_sz_others = np.zeros_like(Count_sz_local)
    Count_sz_sum = np.zeros_like(Count_sz_local)

    Theta_zh = np.zeros_like(Count_zh, dtype='f8')
    Psi_sz = np.zeros_like(Count_sz_local, dtype='f8')
    
    can_pair = True
    for i in xrange(num_iter // CACHE_SIZE):
        #Sample from the local counts and encountered counts
        Count_sz_sum[:] = Count_sz_local + Count_sz_others
        count_z[:] = Count_sz_sum.sum(axis=0)
        
        em(Dts, Trace, stamps, Count_zh, Count_sz_sum, \
                count_h, count_z, alpha_zh, beta_zs, aux, Theta_zh, \
                Psi_sz, CACHE_SIZE, CACHE_SIZE * 2, kernel, False)

        #Update local counts
        Count_sz_local[:] = Count_sz_sum - Count_sz_others
        count_z[:] = Count_sz_local.sum(axis=0)

        #Update expected belief of other processors
        if can_pair:
            P_local = kernel.get_state()
            can_pair = paired_update(comm, previous_encounters_s, \
                    Count_sz_local, Count_sz_pair, Count_sz_others, \
                    P_local, np.zeros_like(P_local))
            kernel.update_state(P_local)
コード例 #3
0
ファイル: plearn.py プロジェクト: flaviovdf/tribeflow
def sample(Dts, Trace, Count_zh, Count_sz_local, \
        count_h, count_z, alpha_zh, beta_zs, kernel, num_iter, comm):
    
    previous_encounters_s = {}
    for other_processor in xrange(1, comm.size):
        previous_encounters_s[other_processor] = np.zeros_like(Count_sz_local)

    stamps = StampLists(Count_zh.shape[0])
    for z in xrange(Count_zh.shape[0]):
        idx = Trace[:, -1] == z
        #dts_assigned = Dts[idx:, 0].ravel().copy()
        #np.sort(dts_assigned)
        stamps._extend(z, Dts[idx][:, -1])
    
    aux = np.zeros(Count_zh.shape[0], dtype='f8')
 
    Count_sz_pair = np.zeros_like(Count_sz_local)
    Count_sz_others = np.zeros_like(Count_sz_local)
    Count_sz_sum = np.zeros_like(Count_sz_local)

    Theta_zh = np.zeros_like(Count_zh, dtype='f8')
    Psi_sz = np.zeros_like(Count_sz_local, dtype='f8')
    
    can_pair = True
    for i in xrange(num_iter // CACHE_SIZE):
        #Sample from the local counts and encountered counts
        Count_sz_sum[:] = Count_sz_local + Count_sz_others
        count_z[:] = Count_sz_sum.sum(axis=0)
        
        em(Dts, Trace, stamps, Count_zh, Count_sz_sum, \
                count_h, count_z, alpha_zh, beta_zs, aux, Theta_zh, \
                Psi_sz, CACHE_SIZE, CACHE_SIZE * 2, kernel, False)

        #Update local counts
        Count_sz_local[:] = Count_sz_sum - Count_sz_others
        count_z[:] = Count_sz_local.sum(axis=0)

        #Update expected belief of other processors
        if can_pair:
            P_local = kernel.get_state()
            can_pair = paired_update(comm, previous_encounters_s, \
                    Count_sz_local, Count_sz_pair, Count_sz_others, \
                    P_local, np.zeros_like(P_local))
            kernel.update_state(P_local)
コード例 #4
0
ファイル: dynamic.py プロジェクト: yyan162/tribeflow
def fit(trace_fpath, num_topics, alpha_zh, beta_zs, kernel, \
        residency_priors, num_iter, num_batches, mpi_mode, from_=0, to=np.inf):
    '''
    Learns the latent topics from a temporal hypergraph trace. Here we do a
    asynchronous learning of the topics similar to AD-LDA, as well as the 
    dynamic topic expansion/pruing.

    Parameters
    ----------
    trace_fpath : str
        The path of the trace. Each line should be a \
                (timestamp, hypernode, source, destination) where the \
                timestamp is a long (seconds or milliseconds from epoch).

    num_topics : int
        The number of latent spaces to learn

    alpha_zh : float
        The value of the alpha_zh hyperparameter

    beta_zs : float
        The value of the beta_zs (beta) hyperaparameter

    kernel : Kernel object
        The kernel to use

    residency_priors : array of float
        The kernel hyper parameters

    num_iter : int
        The number of iterations to learn the model from
    
    num_batches : int
        Defines the number of batches of size num_iter 

    Returns
    -------
    
    A dictionary with the results.
    '''
    assert num_batches >= 2 
    comm = MPI.COMM_WORLD
    num_workers = comm.size - 1

    Dts, Trace, previous_stamps, Count_zh, Count_sz, \
            count_h, count_z, prob_topics_aux, Theta_zh, Psi_sz, \
            hyper2id, source2id = \
            dataio.initialize_trace(trace_fpath, num_topics, num_iter, \
            from_, to)
    
    if mpi_mode:
        workloads = generate_workload(Count_zh.shape[1], num_workers, Trace)
    all_idx = np.arange(Trace.shape[0], dtype='i4')
    
    for batch in xrange(num_batches):
        print('Now at batch', batch)
        if mpi_mode:
            for worker_id in xrange(1, num_workers + 1):
                comm.send(num_iter, dest=worker_id, tag=Msg.LEARN.value)

            dispatch_jobs(Dts, Trace, Count_zh, Count_sz, \
                    count_h, count_z, alpha_zh, beta_zs, kernel, \
                    residency_priors, workloads, num_workers, comm)
            manage(comm, num_workers)
            fetch_results(comm, num_workers, workloads, Dts, Trace, \
                    previous_stamps, Count_zh, Count_sz, count_h, \
                    count_z, alpha_zh, beta_zs, Theta_zh, Psi_sz, \
                    kernel)
        else:
            prob_topics_aux = np.zeros(Count_zh.shape[0], dtype='f8')
            _learn.em(Dts, Trace, previous_stamps, Count_zh, Count_sz, \
                    count_h, count_z, alpha_zh, beta_zs, \
                    prob_topics_aux, Theta_zh, Psi_sz, num_iter, \
                    num_iter * 2, kernel, False)
        
        print('Split')
        ll_per_z = np.zeros(count_z.shape[0], dtype='f8')
        _eval.quality_estimate(Dts, Trace, previous_stamps, \
                Count_zh, Count_sz, count_h, count_z, alpha_zh, \
                beta_zs, ll_per_z, all_idx, kernel)
        Trace, Count_zh, Count_sz, count_z, previous_stamps, \
                P = split(Dts, Trace, previous_stamps, Count_zh, \
                Count_sz, count_h, count_z, alpha_zh, beta_zs, \
                ll_per_z, kernel)
        kernel = kernel.__class__()
        kernel.build(Trace.shape[0], Count_zh.shape[0], residency_priors)
        if residency_priors.shape[0] > 0:
            kernel.update_state(P)

        print('Merge')
        ll_per_z = np.zeros(count_z.shape[0], dtype='f8')
        _eval.quality_estimate(Dts, Trace, previous_stamps, \
                Count_zh, Count_sz, count_h, count_z, alpha_zh, \
                beta_zs, ll_per_z, all_idx, kernel)
        Trace, Count_zh, Count_sz, count_z, previous_stamps, \
                P = merge(Dts, Trace, previous_stamps, Count_zh, \
                Count_sz, count_h, count_z, alpha_zh, beta_zs, \
                ll_per_z, kernel)
        kernel = kernel.__class__()
        kernel.build(Trace.shape[0], Count_zh.shape[0], residency_priors)
        if residency_priors.shape[0] > 0:
            kernel.update_state(P)
 
	Theta_zh = np.zeros(shape=Count_zh.shape, dtype='f8')
	Psi_sz = np.zeros(shape=Count_sz.shape, dtype='f8')
	if batch == num_batches - 1:
            print('Computing probs')
    	    _learn._aggregate(Count_zh, Count_sz, count_h, count_z, \
                alpha_zh, beta_zs, Theta_zh, Psi_sz)
        print('New nz', Count_zh.shape[0]) 
    if mpi_mode:
        for worker_id in xrange(1, num_workers + 1):
            comm.send(num_iter, dest=worker_id, tag=Msg.STOP.value)
    
    rv = prepare_results(trace_fpath, num_topics, alpha_zh, beta_zs, \
            kernel, residency_priors, num_iter, -1, Dts, Trace, \
            Count_zh, Count_sz, count_h, count_z, prob_topics_aux, Theta_zh, \
            Psi_sz, hyper2id, source2id, from_, to)

    rv['num_workers'] = np.asarray([num_workers])
    rv['num_batches'] = np.asarray([num_batches])
    rv['algorithm'] = np.asarray(['parallel dynamic'])
    return rv