コード例 #1
0
 parser.add_argument('output', help='output file')
 options = parser.parse_args()
 
 phrases_file = options.phrases
 word_vector_file = options.word_vector_file
 theta_file = options.theta
 output_file = options.output
 
 print >> stderr, 'load word vectors...'
 word_vectors = WordVectors.load_vectors(word_vector_file)
 embsize = word_vectors.embsize()
 # print "embsize: ",embsize
 # print "theta file: ",theta_file
 
 print >> stderr, 'load RAE parameters...'
 theta = unpickle(theta_file)
 rae = RecursiveAutoencoder.build(theta, embsize)
   
 total_cost = 0
 total_instance_num = 0
 total_internal_node_num = 0
 
 print '='*63
 print '%20s %20s %20s' % ('all', 'avg/node', 'internal node')
 print '-'*63
 
 with Reader(phrases_file) as reader, Writer(output_file) as writer:
   for phrase in reader:
     instance = Instance.parse_from_str(phrase, word_vectors)
     words_embedded = word_vectors[instance.words]
     root_node, cost = rae.forward(words_embedded)
コード例 #2
0
ファイル: BRAE_Trainer2_L.py プロジェクト: zhu-y11/phrase2vec
        print >> stderr, 'lambda_reg_sem: %20.18f' % lambda_reg_sem
        print >> stderr, 'alpha: %20.18f' % alpha
        print >> stderr, 'Max iterations_la: %d' % maxiter_la
        if _seed:
            print >> stderr, 'Random seed: %s' % _seed
        print >> stderr, ''

        print >> stderr, 'load word vectors...'
        # 载入词向量的输入放入word_vectors中
        src_word_vectors = WordVectors.load_vectors( src_word_vector_file )
        trg_word_vectors = WordVectors.load_vectors( trg_word_vector_file )
        #embsize为词向量的维度
        src_embsize = src_word_vectors.embsize()
        trg_embsize = trg_word_vectors.embsize()
        
        src_theta_opt = unpickle( src_theta_file )
        trg_theta_opt = unpickle( trg_theta_file )
        theta_opt = []
        theta_opt.extend( src_theta_opt )
        theta_opt.extend( trg_theta_opt )

        src_instances, _, src_total_internal_node, bad_src_instances,\
        trg_instances, _, trg_total_internal_node, bad_trg_instances\
                                = prepare_data_la( src_word_vectors, src_instances_file,\
                                                trg_word_vectors, trg_instances_file )

        timer = Timer()
        timer.tic()
        if _seed != None:
            _seed = int(_seed)
        else:
コード例 #3
0
ファイル: rae.py プロジェクト: wanglinjie/theano_exercises
    parser.add_argument('word_vector_file', help='word vector file')
    parser.add_argument('theta', help='RAE parameter file (pickled)')
    parser.add_argument('output', help='output file')
    options = parser.parse_args()

    phrases_file = options.phrases
    word_vector_file = options.word_vector_file
    theta_file = options.theta
    output_file = options.output

    print >> stderr, 'load word vectors...'
    word_vectors = WordVectors.load_vectors(word_vector_file)
    embsize = word_vectors.embsize()

    print >> stderr, 'load RAE parameters...'
    theta = unpickle(theta_file)
    rae = RecursiveAutoencoder.build(theta, embsize)

    total_cost = 0
    total_instance_num = 0
    total_internal_node_num = 0

    print '=' * 63
    print '%20s %20s %20s' % ('all', 'avg/node', 'internal node')
    print '-' * 63

    with Reader(phrases_file) as reader, Writer(output_file) as writer:
        for phrase in reader:
            instance = Instance.parse_from_str(phrase, word_vectors)
            words_embedded = word_vectors[instance.words]
            root_node, cost = rae.forward(words_embedded)