Beispiel #1
0
def train():
    '''
    $1 path to config file
    '''
    start = datetime.now()
    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
    # loading configs;
    t = sys.argv[1]
    config = get_config(sys.argv[2])
    k = config['k']
    nthread = config['nthread']
    asyn = config['asyn']
    mm_path=config['mm_path']
    var_path = config['var_path']
    minibatch = config['minibatch']
    dict_path = config['dict_path']
    corpus,dictionary = corpus_dictionary(mm_path,dict_path)
    V = corpus.num_terms
    output_fn = os.path.join(var_path,'lda')

    if t == 'online':
        lda = online_lda(corpus,dictionary,k,minibatch)
        lda.save(output_fn)
    elif t == 'batch':
        lda = batch_lda(corpus,dictionary,k)
        lda.save(output_fn)

    end = datetime.now()
    print end-start
Beispiel #2
0
def train():
    '''
    $1 path to config file
    '''
    start = datetime.now()
    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                        level=logging.INFO)
    # loading configs;
    t = sys.argv[1]
    config = get_config(sys.argv[2])
    k = config['k']
    nthread = config['nthread']
    asyn = config['asyn']
    mm_path = config['mm_path']
    var_path = config['var_path']
    minibatch = config['minibatch']
    dict_path = config['dict_path']
    corpus, dictionary = corpus_dictionary(mm_path, dict_path)
    V = corpus.num_terms
    output_fn = os.path.join(var_path, 'lda')

    if t == 'online':
        lda = online_lda(corpus, dictionary, k, minibatch)
        lda.save(output_fn)
    elif t == 'batch':
        lda = batch_lda(corpus, dictionary, k)
        lda.save(output_fn)

    end = datetime.now()
    print end - start
Beispiel #3
0
def main():
    # Initializations and preliminaries
    comm = MPI.COMM_WORLD  # get MPI communicator object
    size = comm.size  # total number of processes
    rank = comm.rank  # rank of this process
    status = MPI.Status()  # get MPI status object
    tags = enum('READY', 'DONE', 'EXIT', 'START')

    if rank == 0:
        # Master process
        '''
        $1 path to config file
        '''
        start = datetime.now()
        logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                            level=logging.INFO)

        # loading configs;
        config = get_config(sys.argv[2])
        k = config['k']
        nthread = config['nthread']
        asyn = config['asyn']  # here, the value should be 'mpi'
        mm_path = config['mm_path']
        var_path = config['var_path']
        minibatch = config['minibatch']
        corpus = _mCorpus.get_corpus(mm_path)
        V = corpus.num_terms

        eta = master_process(comm, status, tags, corpus, k, V, nthread,
                             minibatch, var_path)

        # store the final pickle
        fn = 'eta.final.pickle'
        path = os.path.join(var_path, fn)
        _mea.write_eta(eta, path)

        end = datetime.now()
        print end - start

    else:
        # Worker process
        name = MPI.Get_processor_name()
        worker_process(comm, status, tags, name)
Beispiel #4
0
def main():
    # Initializations and preliminaries
    comm = MPI.COMM_WORLD   # get MPI communicator object
    size = comm.size        # total number of processes
    rank = comm.rank        # rank of this process
    status = MPI.Status()   # get MPI status object
    tags = enum('READY', 'DONE', 'EXIT', 'START')

    if rank == 0:
        # Master process
        '''
        $1 path to config file
        '''
        start = datetime.now()
        logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

        # loading configs;
        config = get_config(sys.argv[2])
        k = config['k']
        nthread = config['nthread']
        asyn = config['asyn'] # here, the value should be 'mpi'
        mm_path=config['mm_path']
        var_path = config['var_path']
        minibatch = config['minibatch']
        corpus = _mCorpus.get_corpus(mm_path)
        V = corpus.num_terms
        
        eta = master_process(comm,status,tags,corpus,k,V,nthread,minibatch,var_path)
        
        # store the final pickle
        fn = 'eta.final.pickle'
        path = os.path.join(var_path,fn)
        _mea.write_eta(eta,path)

        end = datetime.now()
        print end-start

    else:
        # Worker process
        name = MPI.Get_processor_name()
        worker_process(comm,status,tags,name)