Esempio n. 1
0
 def load(cls, filename, lm, config):
   logger.info('Loading rule table from "%s"...' % filename)
   timer = Timer()
   timer.tic()
   table = cls.__load_rules(filename, lm, config)
   logger.info('Rule table loaded in %f seconds.' % timer.toc())
   return table
Esempio n. 2
0
 def load(cls, filename, lm, config):
     logger.info('Loading rule table from "%s"...' % filename)
     timer = Timer()
     timer.tic()
     table = cls.__load_rules(filename, lm, config)
     logger.info('Rule table loaded in %f seconds.' % timer.toc())
     return table
Esempio n. 3
0
    print >> stderr, 'lambda_reg: %20.18f' % lambda_reg
    print >> stderr, 'Max iterations: %d' % maxiter
    if _seed:
      print >> stderr, 'Random seed: %s' % _seed
    print >> stderr, ''
    
    print >> stderr, 'load Word2Vec Model...'

    word_vectors = WordVectors.load_vectors(word_vector_file)
    embsize = word_vectors.embsize()
       
    print >> stderr, 'preparing data...' 
    instances, _, total_internal_node = prepare_data(word_vectors, instances_file)
    
    print >> stderr, 'init. RAE parameters...'
    timer = Timer()
    timer.tic()
    if _seed != None:
      _seed = int(_seed)
    else:
      _seed = None
    print >> stderr, 'seed: %s' % str(_seed)

    theta0 = init_theta(embsize, _seed=_seed)
    theta0_init_time = timer.toc()
    print >> stderr, 'shape of theta0 %s' % theta0.shape
    timer.tic()
    if save_theta0:
      print >> stderr, 'saving theta0...'
      pos = model.rfind('.')
      if pos < 0:
Esempio n. 4
0
        print >> stderr, 'lambda_reg: %20.18f' % lambda_reg
        print >> stderr, 'Max iterations: %d' % maxiter
        if _seed:
            print >> stderr, 'Random seed: %s' % _seed
        print >> stderr, ''

        print >> stderr, 'load word vectors...'
        word_vectors = WordVectors.load_vectors(word_vector_file)
        embsize = word_vectors.embsize()

        print >> stderr, 'preparing data...'
        instances, _, total_internal_node = prepare_data(
            word_vectors, instances_file)

        print >> stderr, 'init. RAE parameters...'
        timer = Timer()
        timer.tic()
        if _seed != None:
            _seed = int(_seed)
        else:
            _seed = None
        print >> stderr, 'seed: %s' % str(_seed)

        theta0 = init_theta(embsize, _seed=_seed)
        theta0_init_time = timer.toc()
        print >> stderr, 'shape of theta0 %s' % theta0.shape
        timer.tic()
        if save_theta0:
            print >> stderr, 'saving theta0...'
            pos = model.rfind('.')
            if pos < 0:
Esempio n. 5
0
                         rule_table,
                         lm,
                         recombination_checker=recombination_checker,
                         extra_feature_funcs=extra_feature_funcs,
                         checking_hypo=checking,
                         expend_loser=expend_loser)

    logger.info('Start decoding...')

    def translate(data):
        _, sentence = data
        translations = decoder.translate(sentence, k, drop_oov, with_rule_tree)

        return translations

    total_timer = Timer()
    total_timer.tic()
    timer = Timer()
    sentences = [
        re.sub(r'\s+', ' ', sentence).strip().split(' ') for sentence in source
    ]
    data = [(sid, sentences[sid]) for sid in range(len(sentences))]
    if threads > 1:
        pool = Pool(threads, init_worker)
        all_translations = []
        try:
            all_translations = pool.map(translate, data)
            pool.close()
            pool.join()
        except KeyboardInterrupt:
            logger.critical('Caught KeyboardInterrupt, terminating workers')
 
 extra_feature_funcs = build_extra_feature_funcs(config)
 recombination_checker = CombinedRecombinationChecker(extra_feature_funcs)
 decoder = CKYDecoder(config, rule_table, lm, 
                      recombination_checker=recombination_checker,
                      extra_feature_funcs=extra_feature_funcs,
                      checking_hypo=checking, expend_loser=expend_loser)
 
 logger.info('Start decoding...')
 def translate(data):
   _, sentence = data
   translations = decoder.translate(sentence, k, drop_oov, with_rule_tree)
   
   return translations
 
 total_timer = Timer()
 total_timer.tic()
 timer = Timer()
 sentences = [re.sub(r'\s+', ' ', sentence).strip().split(' ') for
              sentence in source]
 data = [(sid, sentences[sid]) for sid in range(len(sentences))]
 if threads > 1:
   pool = Pool(threads, init_worker)
   all_translations = []
   try:
     all_translations = pool.map(translate, data)
     pool.close()
     pool.join()
   except KeyboardInterrupt:
     logger.critical('Caught KeyboardInterrupt, terminating workers')
     pool.terminate()
Esempio n. 7
0
            print >> stderr, 'Random seed: %s' % _seed
        print >> stderr, ''
    
        print >> stderr, 'load word vectors...'
        # 载入词向量的输入放入word_vectors中
        word_vectors = WordVectors.load_vectors( word_vector_file )
        Xidx = [word_vectors.get_word_index( '$X_1' ), word_vectors.get_word_index( '$X_2' )]
        #embsize为词向量的维度
        embsize = word_vectors.embsize()

        print >> stderr, 'preparing data...' 
        #载入训练短语数据,将短语转化为instance的数组放入instances中
        instances, _, total_internal_node = prepare_data( word_vectors, instances_file )
 
        print >> stderr, 'init. RAE parameters...'
        timer = Timer()
        timer.tic()
        if _seed != None:
            _seed = int(_seed)
        else:
            _seed = None
        print >> stderr, 'seed: %s' % str(_seed)

        # 初始化参数
        theta0 = init_theta( embsize, word_vectors, _seed = _seed )
        theta0_init_time = timer.toc()
        print >> stderr, 'shape of ' + tp + ' theta0 %s' % theta0.shape
        timer.tic()
        if save_theta0:
            print >> stderr, 'saving ' + tp + ' theta0...'
            pos = model.rfind('.')