def train(): ''' $1 path to config file ''' start = datetime.now() logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) # loading configs; config = get_config(sys.argv[2]) k = config['k'] nthread = config['nthread'] asyn = config['asyn'] mm_path=config['mm_path'] var_path = config['var_path'] minibatch = config['minibatch'] corpus = _mCorpus.get_corpus(mm_path) V = corpus.num_terms if asyn: eta = _masynf.asyn_framework(corpus,k,V,nthread,minibatch,var_path) else: eta = _msynf.syn_framework(corpus,k,V,nthread,minibatch,var_path,True) fn = 'eta.final.pickle' path = os.path.join(var_path,fn) _mea.write_eta(eta,path) end = datetime.now() print end-start
def train(): ''' $1 path to config file ''' start = datetime.now() logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) # loading configs; config = get_config(sys.argv[2]) k = config['k'] nthread = config['nthread'] asyn = config['asyn'] mm_path = config['mm_path'] var_path = config['var_path'] minibatch = config['minibatch'] corpus = _mCorpus.get_corpus(mm_path) V = corpus.num_terms if asyn: eta = _masynf.asyn_framework(corpus, k, V, nthread, minibatch, var_path) else: eta = _msynf.syn_framework(corpus, k, V, nthread, minibatch, var_path, True) fn = 'eta.final.pickle' path = os.path.join(var_path, fn) _mea.write_eta(eta, path) end = datetime.now() print end - start
def test(): ''' $1 path to config file ''' logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) # loading configs; config = get_config(sys.argv[2]) k = config['k'] test_path = config['test_path'] test_train_path = config['test_train'] test_test_path = config['test_test'] eta_path = config['eta_path'] gensim = config['gensim'] print eta_path corpus = _mCorpus.get_corpus(test_path) V = corpus.num_terms voc_set = set() for doc in corpus: for wid, count in doc: voc_set.add(wid) etaTest, etaSum = None, None if gensim: etaTest, etaSum = _mea.get_gensim_eta_etaSum(eta_path, voc_set) else: eta = _mea.load_eta(eta_path) etaTest = _mea.get_eta(k, eta, voc_set) etaSum = _mea.get_eta_sum(eta, k, V) test_test = _mCorpus.get_corpus(test_test_path) test_train = _mCorpus.get_corpus(test_train_path) alpha = _mea.get_alpha(k) perplexity = _mper.perplexity(test_train, test_test, alpha, etaTest, etaSum) print perplexity
def test(): ''' $1 path to config file ''' logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) # loading configs; config = get_config(sys.argv[2]) k = config['k'] test_path = config['test_path'] test_train_path = config['test_train'] test_test_path = config['test_test'] eta_path = config['eta_path'] gensim= config['gensim'] print eta_path corpus = _mCorpus.get_corpus(test_path) V = corpus.num_terms voc_set = set() for doc in corpus: for wid,count in doc: voc_set.add(wid) etaTest, etaSum = None,None if gensim: etaTest, etaSum = _mea.get_gensim_eta_etaSum(eta_path,voc_set) else: eta = _mea.load_eta(eta_path) etaTest = _mea.get_eta(k,eta,voc_set) etaSum = _mea.get_eta_sum(eta,k,V) test_test = _mCorpus.get_corpus(test_test_path) test_train = _mCorpus.get_corpus(test_train_path) alpha = _mea.get_alpha(k) perplexity = _mper.perplexity(test_train,test_test,alpha,etaTest,etaSum) print perplexity
def main(): # Initializations and preliminaries comm = MPI.COMM_WORLD # get MPI communicator object size = comm.size # total number of processes rank = comm.rank # rank of this process status = MPI.Status() # get MPI status object tags = enum('READY', 'DONE', 'EXIT', 'START') if rank == 0: # Master process ''' $1 path to config file ''' start = datetime.now() logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) # loading configs; config = get_config(sys.argv[2]) k = config['k'] nthread = config['nthread'] asyn = config['asyn'] # here, the value should be 'mpi' mm_path = config['mm_path'] var_path = config['var_path'] minibatch = config['minibatch'] corpus = _mCorpus.get_corpus(mm_path) V = corpus.num_terms eta = master_process(comm, status, tags, corpus, k, V, nthread, minibatch, var_path) # store the final pickle fn = 'eta.final.pickle' path = os.path.join(var_path, fn) _mea.write_eta(eta, path) end = datetime.now() print end - start else: # Worker process name = MPI.Get_processor_name() worker_process(comm, status, tags, name)
def main(): # Initializations and preliminaries comm = MPI.COMM_WORLD # get MPI communicator object size = comm.size # total number of processes rank = comm.rank # rank of this process status = MPI.Status() # get MPI status object tags = enum('READY', 'DONE', 'EXIT', 'START') if rank == 0: # Master process ''' $1 path to config file ''' start = datetime.now() logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) # loading configs; config = get_config(sys.argv[2]) k = config['k'] nthread = config['nthread'] asyn = config['asyn'] # here, the value should be 'mpi' mm_path=config['mm_path'] var_path = config['var_path'] minibatch = config['minibatch'] corpus = _mCorpus.get_corpus(mm_path) V = corpus.num_terms eta = master_process(comm,status,tags,corpus,k,V,nthread,minibatch,var_path) # store the final pickle fn = 'eta.final.pickle' path = os.path.join(var_path,fn) _mea.write_eta(eta,path) end = datetime.now() print end-start else: # Worker process name = MPI.Get_processor_name() worker_process(comm,status,tags,name)