epoch_sentence = [] print "Starting epoch %i..." % epoch permutation_index = np.random.permutation(len(train_data)) train_data_count = 0 start_time_epoch = datetime.datetime.now() token_count = 0.0 while train_data_count <= len(permutation_index): batch_data = [] start_time = datetime.datetime.now() for i in xrange(batch_size): count += 1 index = i + train_data_count if index >= len(permutation_index): index %= len(permutation_index) batch_data.append(train_data[permutation_index[index]]) input_ = create_input_batch(batch_data, parameters, n_tag, True, singletons) feed_dict_ = {} if parameters['char_dim']: assert len(input_) == 8 feed_dict_[model.word_ids] = input_[0] feed_dict_[model.word_pos_ids] = input_[1] feed_dict_[model.char_for_ids] = input_[2] feed_dict_[model.char_rev_ids] = input_[3] feed_dict_[model.char_pos_ids] = input_[4] feed_dict_[model.tag_ids] = input_tag = input_[5] feed_dict_[model.tag_id_trans] = input_[6] feed_dict_[model.tag_id_index] = input_[7] else: assert len(input_) == 5 feed_dict_[model.word_ids] = input_[0] feed_dict_[model.word_pos_ids] = input_[1]
continue count = 0 assert len(test_data) == len(word_data) while count < len(test_data): batch_data = [] batch_words = [] for i in xrange(batch_size): index = i + count if index >= len(test_data): break data = test_data[index] batch_data.append(test_data[index]) batch_words.append(word_data[index]) if len(batch_data) <= 0: break input_ = create_input_batch(batch_data, parameters) feed_dict_ = {} if parameters['char_dim']: feed_dict_[model.word_ids] = input_[0] feed_dict_[model.word_pos_ids] = input_[1] feed_dict_[model.char_for_ids] = input_[2] feed_dict_[model.char_rev_ids] = input_[3] feed_dict_[model.char_pos_ids] = input_[4] else: feed_dict_[model.word_ids] = input_[0] feed_dict_[model.word_pos_ids] = input_[1] feed_dict_[model.pos_ids] = input_[2] f_scores = sess.run(f_eval, feed_dict=feed_dict_) # 解码 if parameters['crf']:
with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) print_train_var() batch_size = parameters["batch_size"] n_batch = len(byte_pos_train) / batch_size logging.info("total batch is %i", n_batch) for epoch in xrange(parameters["n_epoch"]): train_data_count = 0 logging.info("start epoch %i", epoch) # permuate_index = np.random.permutation(len(byte_pos_train)) permuate_index = np.arange(len(byte_pos_train)) # pdb.set_trace() for i in range(n_batch): batch_index = permuate_index[i * batch_size:(i + 1) * batch_size] batch_pos = [byte_pos_train[index] for index in batch_index] batch_data = read_random_data(parameters["train_path"], batch_pos) input_ = create_input_batch(batch_data) # pdb.set_trace() feed_dict_ = {} feed_dict_[model.sents_id] = input_[0] feed_dict_[model.sents_pos] = input_[1] feed_dict_[model.paras_pos] = input_[2] feed_dict_[model.labels] = input_[3] feed_dict_[model.max_sen_len] = max(input_[1]) feed_dict_[model.max_para_len] = max(input_[2]) # pdb.set_trace() f_score = sess.run([cost], feed_dict=feed_dict_) pdb.set_trace()