def test_full_learn_null():
    kernel = NoopKernel()
    kernel.build(1, 20, np.zeros(0, dtype='d'))
    rv = learn.fit(files.SIZE1K, 20, .1, .1, kernel, \
            np.zeros(0, dtype='d'), 1000, 200)
    
    Count_zh = rv['Count_zh']
    Count_sz = rv['Count_sz'] 
    
    assert_equal(Count_zh.sum(), 1000)
    assert_equal(Count_sz.sum(), 2000) 
    
    count_h = rv['count_h']
    count_z = rv['count_z']

    assert_equal(count_h.sum(), 1000)
    assert_equal(count_z.sum(), 2000)

    assert rv['assign'].shape == (1000, )

    Theta_zh = rv['Theta_zh']
    Psi_sz = rv['Psi_sz']
    
    assert_almost_equal(1, Theta_zh.sum(axis=0))
    assert_almost_equal(1, Psi_sz.sum(axis=0))
Beispiel #2
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('trace_fpath', help='The trace to learn topics from', \
            type=str)
    parser.add_argument('num_topics', help='The number of topics to learn', \
            type=int)
    parser.add_argument('model_fpath', \
            help='The name of the model file (a h5 file)', type=str)

    parser.add_argument('--num_iter', help='The number of iterations', \
            type=int, default=1000)
    parser.add_argument('--burn_in', \
            help='The burn in (ignored if using mpi)', type=int, \
            default=300)

    parser.add_argument('--dynamic', \
            help='If we should employ the dynamic strategy', \
            type=bool, default=False)
    parser.add_argument('--num_batches', \
            help='Number of batches in dynamic case', type=int, default=10)

    parser.add_argument('--alpha_zh', \
            help='Value of alpha_zh (alpha) hyper. Defaults to 50 / nz', \
            type=float, default=None)
    parser.add_argument('--beta_zs', help='Value of beta_zs (beta) hyper', \
            type=float, default=0.001)

    parser.add_argument('--kernel',
                        choices=kernels.names,
                        help='The kernel to use',
                        type=str,
                        default='noop')
    parser.add_argument('--residency_priors',
                        nargs='+',
                        help='Priors for the residency time dist',
                        type=float,
                        default=None)

    parser.add_argument('--leaveout', \
            help='The number of transitions to leave for test', type=float, \
            default=0)

    args = parser.parse_args()

    comm = MPI.COMM_WORLD
    rank = comm.rank
    size = comm.size
    try:
        comm = MPI.COMM_WORLD
        rank = comm.rank
        size = comm.size
        single_thread = size <= 1
    except:
        single_thread = True

    if not single_thread and rank != plearn.MASTER:
        plearn.work()
    else:
        started = time.mktime(time.localtime())
        num_lines = 0
        with open(args.trace_fpath) as trace_file:
            num_lines = sum(1 for _ in trace_file)

        if args.leaveout > 0:
            leave_out = min(1, args.leaveout)
            if leave_out == 1:
                print('Leave out is 1 (100%), nothing todo')
                return
            from_ = 0
            to = int(num_lines - num_lines * leave_out)
        else:
            from_ = 0
            to = np.inf

        if args.alpha_zh is None:
            alpha_zh = 50.0 / args.num_topics
        else:
            alpha_zh = args.alpha_zh

        kernel = kernels.names[args.kernel]()
        if args.residency_priors:
            residency_priors = np.array(args.residency_priors, dtype='d')
        else:
            residency_priors = np.zeros(shape=(0, ), dtype='d')

        if np.isinf(to):
            kernel.build(num_lines - from_, args.num_topics, residency_priors)
        else:
            kernel.build(to - from_, args.num_topics, residency_priors)

        if single_thread:
            print(
                'Not on MPI mode or just one MPI proc, running single thread')
            dyn = args.dynamic
            if dyn:
                num_iter = args.num_iter // args.num_batches
                rv = dynamic.fit(args.trace_fpath, args.num_topics, alpha_zh, \
                        args.beta_zs, kernel, residency_priors, \
                        num_iter, args.num_batches, False, from_=from_, to=to)
            else:
                rv = learn.fit(args.trace_fpath, args.num_topics, alpha_zh, \
                        args.beta_zs, kernel, residency_priors, \
                        args.num_iter, args.burn_in, from_=from_, to=to)
        else:
            dyn = args.dynamic
            if bool(dyn):
                num_iter = args.num_iter // args.num_batches
                rv = dynamic.fit(args.trace_fpath, args.num_topics, alpha_zh, \
                        args.beta_zs, kernel, residency_priors, num_iter, \
                        args.num_batches, True, from_=from_, to=to)
            else:
                rv = plearn.fit(args.trace_fpath, args.num_topics, alpha_zh, \
                        args.beta_zs, kernel, residency_priors, args.num_iter, \
                        from_=from_, to=to)

        ended = time.mktime(time.localtime())
        rv['training_time'] = np.array([ended - started])
        dataio.save_model(args.model_fpath, rv)
        print('Learning took', ended - started, 'seconds')
Beispiel #3
0
def main():
    parser = argparse.ArgumentParser()
    
    parser.add_argument('trace_fpath', help='The trace to learn topics from', \
            type=str)
    parser.add_argument('num_topics', help='The number of topics to learn', \
            type=int)
    parser.add_argument('model_fpath', \
            help='The name of the model file (a h5 file)', type=str)

    parser.add_argument('--num_iter', help='The number of iterations', \
            type=int, default=1000)
    parser.add_argument('--burn_in', \
            help='The burn in (ignored if using mpi)', type=int, \
            default=300)
    
    parser.add_argument('--dynamic', \
            help='If we should employ the dynamic strategy', \
            type=bool, default=False)
    parser.add_argument('--num_batches', \
            help='Number of batches in dynamic case', type=int, default=10)
    
    parser.add_argument('--alpha_zh', \
            help='Value of alpha_zh (alpha) hyper. Defaults to 50 / nz', \
            type=float, default=None)
    parser.add_argument('--beta_zs', help='Value of beta_zs (beta) hyper', \
            type=float, default=0.001)
    
    parser.add_argument('--kernel', choices=kernels.names,
            help='The kernel to use', type=str, default='noop')
    parser.add_argument('--residency_priors', nargs='+',
            help='Priors for the residency time dist', type=float, default=None)
    
    parser.add_argument('--leaveout', \
            help='The number of transitions to leave for test', type=float, \
            default=0)
    
    args = parser.parse_args()
    
    comm = MPI.COMM_WORLD
    rank = comm.rank
    size = comm.size 
    try:
        comm = MPI.COMM_WORLD
        rank = comm.rank
        size = comm.size 
        single_thread = size <= 1
    except:
        single_thread = True
    
    if not single_thread and rank != plearn.MASTER:
        plearn.work()
    else:
        started = time.mktime(time.localtime())
        num_lines = 0
        with open(args.trace_fpath) as trace_file:
            num_lines = sum(1 for _ in trace_file)
        
        if args.leaveout > 0:
            leave_out = min(1, args.leaveout)
            if leave_out == 1:
                print('Leave out is 1 (100%), nothing todo')
                return
            from_ = 0
            to = int(num_lines - num_lines * leave_out)
        else:
            from_ = 0
            to = np.inf

        if args.alpha_zh is None:
            alpha_zh = 50.0 / args.num_topics
        else:
            alpha_zh = args.alpha_zh
        
        kernel = kernels.names[args.kernel]()
        if args.residency_priors:
            residency_priors = np.array(args.residency_priors, dtype='d')
        else:
            residency_priors = np.zeros(shape=(0, ), dtype='d')
        
        if np.isinf(to):
            kernel.build(num_lines - from_, args.num_topics, residency_priors)
        else:
            kernel.build(to - from_, args.num_topics, residency_priors)
        
        if single_thread:
            print('Not on MPI mode or just one MPI proc, running single thread')
            dyn = args.dynamic
            if dyn:
                num_iter = args.num_iter // args.num_batches
                rv = dynamic.fit(args.trace_fpath, args.num_topics, alpha_zh, \
                        args.beta_zs, kernel, residency_priors, \
                        num_iter, args.num_batches, False, from_=from_, to=to)
            else:
                rv = learn.fit(args.trace_fpath, args.num_topics, alpha_zh, \
                        args.beta_zs, kernel, residency_priors, \
                        args.num_iter, args.burn_in, from_=from_, to=to)
        else:
            dyn = args.dynamic
            if bool(dyn):
                num_iter = args.num_iter // args.num_batches
                rv = dynamic.fit(args.trace_fpath, args.num_topics, alpha_zh, \
                        args.beta_zs, kernel, residency_priors, num_iter, \
                        args.num_batches, True, from_=from_, to=to)
            else:
                rv = plearn.fit(args.trace_fpath, args.num_topics, alpha_zh, \
                        args.beta_zs, kernel, residency_priors, args.num_iter, \
                        from_=from_, to=to)
        
        ended = time.mktime(time.localtime())
        rv['training_time'] = np.array([ended - started])
        dataio.save_model(args.model_fpath, rv)
        print('Learning took', ended - started, 'seconds')