Esempio n. 1
0
 epoch_sentence = []
 print "Starting epoch %i..." % epoch
 permutation_index = np.random.permutation(len(train_data))
 train_data_count = 0
 start_time_epoch = datetime.datetime.now()
 token_count = 0.0
 while train_data_count <= len(permutation_index):
     batch_data = []
     start_time = datetime.datetime.now()
     for i in xrange(batch_size):
         count += 1
         index = i + train_data_count
         if index >= len(permutation_index):
             index %= len(permutation_index)
         batch_data.append(train_data[permutation_index[index]])
     input_ = create_input_batch(batch_data, parameters, n_tag, True, singletons)
     feed_dict_ = {}
     if parameters['char_dim']:
         assert len(input_) == 8
         feed_dict_[model.word_ids] = input_[0]
         feed_dict_[model.word_pos_ids] = input_[1]
         feed_dict_[model.char_for_ids] = input_[2]
         feed_dict_[model.char_rev_ids] = input_[3]
         feed_dict_[model.char_pos_ids] = input_[4]
         feed_dict_[model.tag_ids] = input_tag  = input_[5]
         feed_dict_[model.tag_id_trans] = input_[6]
         feed_dict_[model.tag_id_index] = input_[7]
     else:
         assert len(input_) == 5
         feed_dict_[model.word_ids] = input_[0]
         feed_dict_[model.word_pos_ids] = input_[1]
Esempio n. 2
0
                continue
    count = 0
    assert len(test_data) == len(word_data)
    while count < len(test_data):
        batch_data = []
        batch_words = []
        for i in xrange(batch_size):
            index = i + count
            if index >= len(test_data):
                break
            data = test_data[index]
            batch_data.append(test_data[index])
            batch_words.append(word_data[index])
        if len(batch_data) <= 0:
            break
        input_ = create_input_batch(batch_data, parameters)
        feed_dict_ = {}
        if parameters['char_dim']:
            feed_dict_[model.word_ids] = input_[0]
            feed_dict_[model.word_pos_ids] = input_[1]
            feed_dict_[model.char_for_ids] = input_[2]
            feed_dict_[model.char_rev_ids] = input_[3]
            feed_dict_[model.char_pos_ids] = input_[4]
        else:
            feed_dict_[model.word_ids] = input_[0]
            feed_dict_[model.word_pos_ids] = input_[1]
            feed_dict_[model.pos_ids] = input_[2]

        f_scores = sess.run(f_eval, feed_dict=feed_dict_)
        # 解码
        if parameters['crf']:
Esempio n. 3
0
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        print_train_var()
        batch_size = parameters["batch_size"]
        n_batch = len(byte_pos_train) / batch_size
        logging.info("total batch is %i", n_batch)
        for epoch in xrange(parameters["n_epoch"]):
            train_data_count = 0
            logging.info("start epoch %i", epoch)
            # permuate_index = np.random.permutation(len(byte_pos_train))
            permuate_index = np.arange(len(byte_pos_train))
            # pdb.set_trace()
            for i in range(n_batch):
                batch_index = permuate_index[i * batch_size:(i + 1) *
                                             batch_size]
                batch_pos = [byte_pos_train[index] for index in batch_index]
                batch_data = read_random_data(parameters["train_path"],
                                              batch_pos)
                input_ = create_input_batch(batch_data)
                # pdb.set_trace()
                feed_dict_ = {}
                feed_dict_[model.sents_id] = input_[0]
                feed_dict_[model.sents_pos] = input_[1]
                feed_dict_[model.paras_pos] = input_[2]
                feed_dict_[model.labels] = input_[3]
                feed_dict_[model.max_sen_len] = max(input_[1])
                feed_dict_[model.max_para_len] = max(input_[2])
                # pdb.set_trace()
                f_score = sess.run([cost], feed_dict=feed_dict_)
                pdb.set_trace()