def load(cls, filename, lm, config): logger.info('Loading rule table from "%s"...' % filename) timer = Timer() timer.tic() table = cls.__load_rules(filename, lm, config) logger.info('Rule table loaded in %f seconds.' % timer.toc()) return table
print >> stderr, 'lambda_reg: %20.18f' % lambda_reg print >> stderr, 'Max iterations: %d' % maxiter if _seed: print >> stderr, 'Random seed: %s' % _seed print >> stderr, '' print >> stderr, 'load Word2Vec Model...' word_vectors = WordVectors.load_vectors(word_vector_file) embsize = word_vectors.embsize() print >> stderr, 'preparing data...' instances, _, total_internal_node = prepare_data(word_vectors, instances_file) print >> stderr, 'init. RAE parameters...' timer = Timer() timer.tic() if _seed != None: _seed = int(_seed) else: _seed = None print >> stderr, 'seed: %s' % str(_seed) theta0 = init_theta(embsize, _seed=_seed) theta0_init_time = timer.toc() print >> stderr, 'shape of theta0 %s' % theta0.shape timer.tic() if save_theta0: print >> stderr, 'saving theta0...' pos = model.rfind('.') if pos < 0:
print >> stderr, 'lambda_reg: %20.18f' % lambda_reg print >> stderr, 'Max iterations: %d' % maxiter if _seed: print >> stderr, 'Random seed: %s' % _seed print >> stderr, '' print >> stderr, 'load word vectors...' word_vectors = WordVectors.load_vectors(word_vector_file) embsize = word_vectors.embsize() print >> stderr, 'preparing data...' instances, _, total_internal_node = prepare_data( word_vectors, instances_file) print >> stderr, 'init. RAE parameters...' timer = Timer() timer.tic() if _seed != None: _seed = int(_seed) else: _seed = None print >> stderr, 'seed: %s' % str(_seed) theta0 = init_theta(embsize, _seed=_seed) theta0_init_time = timer.toc() print >> stderr, 'shape of theta0 %s' % theta0.shape timer.tic() if save_theta0: print >> stderr, 'saving theta0...' pos = model.rfind('.') if pos < 0:
rule_table, lm, recombination_checker=recombination_checker, extra_feature_funcs=extra_feature_funcs, checking_hypo=checking, expend_loser=expend_loser) logger.info('Start decoding...') def translate(data): _, sentence = data translations = decoder.translate(sentence, k, drop_oov, with_rule_tree) return translations total_timer = Timer() total_timer.tic() timer = Timer() sentences = [ re.sub(r'\s+', ' ', sentence).strip().split(' ') for sentence in source ] data = [(sid, sentences[sid]) for sid in range(len(sentences))] if threads > 1: pool = Pool(threads, init_worker) all_translations = [] try: all_translations = pool.map(translate, data) pool.close() pool.join() except KeyboardInterrupt: logger.critical('Caught KeyboardInterrupt, terminating workers')
extra_feature_funcs = build_extra_feature_funcs(config) recombination_checker = CombinedRecombinationChecker(extra_feature_funcs) decoder = CKYDecoder(config, rule_table, lm, recombination_checker=recombination_checker, extra_feature_funcs=extra_feature_funcs, checking_hypo=checking, expend_loser=expend_loser) logger.info('Start decoding...') def translate(data): _, sentence = data translations = decoder.translate(sentence, k, drop_oov, with_rule_tree) return translations total_timer = Timer() total_timer.tic() timer = Timer() sentences = [re.sub(r'\s+', ' ', sentence).strip().split(' ') for sentence in source] data = [(sid, sentences[sid]) for sid in range(len(sentences))] if threads > 1: pool = Pool(threads, init_worker) all_translations = [] try: all_translations = pool.map(translate, data) pool.close() pool.join() except KeyboardInterrupt: logger.critical('Caught KeyboardInterrupt, terminating workers') pool.terminate()
print >> stderr, 'Random seed: %s' % _seed print >> stderr, '' print >> stderr, 'load word vectors...' # 载入词向量的输入放入word_vectors中 word_vectors = WordVectors.load_vectors( word_vector_file ) Xidx = [word_vectors.get_word_index( '$X_1' ), word_vectors.get_word_index( '$X_2' )] #embsize为词向量的维度 embsize = word_vectors.embsize() print >> stderr, 'preparing data...' #载入训练短语数据,将短语转化为instance的数组放入instances中 instances, _, total_internal_node = prepare_data( word_vectors, instances_file ) print >> stderr, 'init. RAE parameters...' timer = Timer() timer.tic() if _seed != None: _seed = int(_seed) else: _seed = None print >> stderr, 'seed: %s' % str(_seed) # 初始化参数 theta0 = init_theta( embsize, word_vectors, _seed = _seed ) theta0_init_time = timer.toc() print >> stderr, 'shape of ' + tp + ' theta0 %s' % theta0.shape timer.tic() if save_theta0: print >> stderr, 'saving ' + tp + ' theta0...' pos = model.rfind('.')