def loadandexport(root, name, out, dictionary=None): cfg = load_config().cfg with open(ops.join(root, name), 'r') as data, tf.python_io.TFRecordWriter(out) as writer: info = np.array([tmp.strip().split() for tmp in data.readlines()]) index = 0 sys.stdout.write('>>Writing TFRecords for parameters: (l: {}, w: {}, h: {})\n'.format(cfg.ARCH.SEQ_LENGTH,cfg.ARCH.INPUT_SIZE[0], cfg.ARCH.INPUT_SIZE[1])) for entry in info: index = index + 1 image = cv2.imread(ops.join(root, entry[0]), cv2.IMREAD_COLOR) if image is not None: image_org = cv2.resize(image, (cfg.ARCH.INPUT_SIZE[0], cfg.ARCH.INPUT_SIZE[1])) filename = ops.basename(entry[0]) if dictionary is not None: label = unidecode(dictionary[int(entry[1])][0]) else: label = unidecode(entry[1]) if len(label) <= cfg.ARCH.SEQ_LENGTH: label_encoded = [char_to_int(char) for char in label] features = tf.train.Features(feature={ 'labels': int64_feature(label_encoded), 'images': bytes_feature(bytes(list(np.reshape(image_org, [cfg.ARCH.INPUT_SIZE[0] * cfg.ARCH.INPUT_SIZE[1] * 3])))), 'imagenames': bytes_feature(filename) }) example = tf.train.Example(features=features) writer.write(example.SerializeToString()) if index >=1000: break; sys.stdout.write('\r>>Writing {:d}/{:d} ({:s}) to tfrecords'.format(index, len(info), filename)) sys.stdout.flush() sys.stdout.write('\n') sys.stdout.flush()
def evaluate_one(): config = load_config(FLAGS.config_file) logger = get_logger(FLAGS.log_file) # limit GPU memory tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True with open(FLAGS.map_file, "rb") as f: char_to_id, id_to_char, target_to_id, id_to_target, feature_to_id, id_to_feature = pickle.load(f) with tf.Session(config=tf_config) as sess: model = create_model(sess, BiLSTMModel, config, load_word2vec, id_to_char, logger) while True: try: line = input("请输入测试句子: ") if line == "exit": exit(0) features = dict() if config["use_other_features"]: for feature_i in config["features"]: if feature_i == "0": continue features[feature_i] = input("请输入 feature_" + feature_i + " : ").split() result = model.evaluate_one(sess, input_from_line(line, features, char_to_id, feature_to_id), id_to_target) print(result) except Exception as e: print(e) logger.info(e)
def main_test(): id_to_word, word_to_id = read_vocab(FLAGS.vocab_dir) id_to_cat, cat_to_id = read_category(FLAGS.category_dir) config = load_config(FLAGS.config_file) model = TextRNN(config) test(model, config, word_to_id, cat_to_id, id_to_cat)
def main(url='', max_workers=10): work_path = os.getcwd() config = load_config( url, os.path.join(work_path, 'configs', 'url_mapping.json')) mode_name = config['mode_name'] progress_display = tqdm() # progress_display 需要实现total成员与update方法 book = Book( url, config, set_total=lambda x: set_total(progress_display, x), update=lambda x: update(progress_display, x), ) book.load_catalog() res = False try: res = book.download() except KeyboardInterrupt: book.terminate() if res: progress_display.close() # nd.make_book() print("全部下载完毕") else: progress_display.close() book.save(os.path.join(os.getcwd(), 'novels'), book.information['book_name'] + '-' + mode_name + '.txt')
def get_info(url, max_workers=10): work_path = os.getcwd() config = load_config( url, os.path.join(work_path, 'configs', 'url_mapping.json')) book = Book(url, config) book.load_catalog() print(book.information)
def init_args() -> Tuple[argparse.Namespace, EasyDict]: """ :return: parsed arguments and (updated) config.cfg object """ parser = argparse.ArgumentParser() parser.add_argument('-d', '--dataset_dir', type=str, help='Directory containing test_features.tfrecords') parser.add_argument('-c', '--chardict_dir', type=str, help='Directory where character dictionaries for the dataset were stored') parser.add_argument('-w', '--weights_path', type=str, required=True, help='Path to pre-trained weights') parser.add_argument('-n', '--num_classes', type=int, required=True, help='Force number of character classes to this number. ' 'Use 37 to run with the demo data. ' 'Set to 0 for auto (read from files in charset_dir)') parser.add_argument('-f', '--config_file', type=str, help='Use this global configuration file') parser.add_argument('-v', '--visualize', type=bool, default=False, help='Whether to display images') parser.add_argument('-b', '--one_batch', default=False, action='store_true', help='Test only one batch of the dataset') parser.add_argument('-j', '--num_threads', type=int, default=int(os.cpu_count() / 2), help='Number of threads to use in batch shuffling') args = parser.parse_args() config = load_config(args.config_file) if args.dataset_dir: config.cfg.PATH.TFRECORDS_DIR = args.dataset_dir if args.chardict_dir: config.cfg.PATH.CHAR_DICT_DIR = args.chardict_dir return args, config.cfg
def main_train(): # load data sets # sentences = [[(words11, tag11), ...], [(word21, tag21), ...], ...] train_sentences = load_sentences(FLAGS.train_file, FLAGS.lower, FLAGS.zeros) dev_sentences = load_sentences(FLAGS.dev_file, FLAGS.lower, FLAGS.zeros) test_sentences = load_sentences(FLAGS.test_file, FLAGS.lower, FLAGS.zeros) # Use selected tagging scheme (IOB / IOBES) # 更新在train_sentences和test_sentences中 update_tag_scheme(train_sentences, FLAGS.tag_schema) update_tag_scheme(test_sentences, FLAGS.tag_schema) # create maps if not exist # 创建或加载字符、词、特征、target的映射字典 if not os.path.isfile(FLAGS.map_file): # create dictionary for word # 若存在pre-trained embedding file,则同时使用pre-trained和训练集构建字典 if FLAGS.pre_emb: # 统计train中字,返回字典 dico_chars_train = char_mapping(train_sentences, FLAGS.lower)[0] # 使用pre-trained的字增大训练集字的字典 dico_chars, char_to_id, id_to_char = augment_with_pretrained( dico_chars_train.copy(), FLAGS.emb_file, list(itertools.chain.from_iterable( [[w[0] for w in s] for s in test_sentences]) ) ) # 否则只是用训练集构建字典 else: _c, char_to_id, id_to_char = char_mapping(train_sentences, FLAGS.lower) # Create a dictionary and a mapping for tags _t, target_to_id, id_to_target = tag_mapping(train_sentences) # 创建其他特征的映射字典,返回的三个都为dict _f, feature_to_id, id_to_feature = feature_mapping(train_sentences, FLAGS.features) # 存储字、target、feature的映射关系 with open(FLAGS.map_file, "wb") as f: pickle.dump([char_to_id, id_to_char, target_to_id, id_to_target, feature_to_id, id_to_feature], f) else: with open(FLAGS.map_file, "rb") as f: char_to_id, id_to_char, target_to_id, id_to_target, feature_to_id, id_to_feature = pickle.load(f) # make path for store log and model config if not exist make_path(FLAGS) if os.path.isfile(FLAGS.config_file): config = load_config(FLAGS.config_file) else: config = create_config_model(FLAGS, char_to_id, target_to_id, feature_to_id) logger = get_logger(FLAGS.log_file) print_config(config, logger) train(config, train_sentences, dev_sentences, test_sentences, char_to_id, feature_to_id, target_to_id, id_to_char, id_to_target, logger)
def save_rnn_for_java(): config = load_config(FLAGS.config_file) model = TextRNN(config) with tf.Session() as session: session.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess=session, save_path=FLAGS.save_path) # 读取保存的模型 builder = tf.saved_model.builder.SavedModelBuilder("tmp/rnn_model") builder.add_meta_graph_and_variables( session, [tf.saved_model.tag_constants.SERVING]) builder.save()
def search(book_name: str, website="全部", config=None): if config is not None: configs = config else: configs = load_config(None, None, not_mapping=os.path.join(os.getcwd(), 'configs', 'search_rules.json')) headers = { "User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1.6) ", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-us", "Connection": "keep-alive", "Accept-Charset": "GB2312,utf-8;q=0.7,*;q=0.7", "Content-Type": "application/x-www-form-urlencoded" } results = [] if website != "全部": configs = {website: configs[website]} for website_url in configs: print("start") config = configs[website_url] if config['status'] == 'closed': continue if config['method'] == 'POST': config['key'][config['book_name_in_key']] = book_name try: response = requests.post( url=config['target_url'], headers=headers, data=urllib.parse.urlencode( config['key'], encoding=config['request_encoding'])) except requests.exceptions.ConnectionError: print("连接失败") continue response.encoding = config['encoding'] bs = BeautifulSoup(response.text, "html5lib") authors = bs.select(config['author'])[1:] prefix = make_prefix(config['prefix'], config['url_prefix']) results += [{ 'href': prefix + i['href'], 'book_name': i.string, 'author': j.string, "from": website_url } for i, j in zip(bs.select(config['book_name']), authors)] return results
def main_train(): # 如果不存在词汇表则新建 if not os.path.exists(FLAGS.vocab_dir): build_vocab(FLAGS.train_dir, FLAGS.vocab_dir, FLAGS.vocab_size) id_to_word, word_to_id = read_vocab(FLAGS.vocab_dir) if not os.path.exists(FLAGS.category_dir): build_category(FLAGS.train_dir, FLAGS.category_dir) id_to_cat, cat_to_id = read_category(FLAGS.category_dir) if os.path.isfile(FLAGS.config_file): config = load_config(FLAGS.config_file) else: config = create_rnn_config_model(FLAGS, id_to_word) model = TextRNN(config) train(model, config, word_to_id, cat_to_id)
def evaluate_one(Model, config_file): """ 加载模型进行单条样本预测 :param Model: :param config_file: :return: """ config = load_config(config_file) id_to_word, word_to_id = read_vocab(config.vocab_dir) id_to_cat, cat_to_id = read_category(config.category_dir) model = Model(config) with tf.Session() as session: session.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore( sess=session, save_path="checkpoints/textcnn/best_validation") # 读取保存的模型 while True: try: line = input("请输入测试文本: ") if line == "exit": exit(0) data = [ word_to_id[x] if x in word_to_id else word_to_id["<UNK>"] for x in line ] pad_data = kr.preprocessing.sequence.pad_sequences( [data], config.seq_length, padding="post", truncating="post") print(pad_data) feed_dict = {model.input_x: pad_data, model.keep_prob: 1.0} y_pred_cls, logits = session.run( [model.y_pred_cls, model.logits], feed_dict=feed_dict) print(y_pred_cls[0], tf.nn.softmax(logits).eval()) print("所属类别: {}".format(id_to_cat[y_pred_cls[0]])) except Exception as e: print(e)
def fetch_info(self): self.c.select_button_text_signal.emit("稍后") url = self.url_input.text() work_path = os.getcwd() config = load_config( url, os.path.join(work_path, 'configs', 'url_mapping.json')) book = Book( url, config, ) try: book.load_catalog() except requests.exceptions.ReadTimeout as rt: print("获取超时") self.c.book_name_signal.emit(book.information['book_name']) self.c.author_signal.emit(book.information['author']) self.c.status_signal.emit(book.information['status']) self.c.update_date_signal.emit(book.information['update_date']) self.c.mode_signal.emit(config['mode_name']) self.c.links_num_signal.emit(len(book.chapters)) self.c.links_signal.emit(book.chapters) self.c.select_button_text_signal.emit("查询")
def download(self): print('start downloading') url = self.url_input.text() work_path = os.getcwd() config = load_config( url, os.path.join(work_path, 'configs', 'url_mapping.json')) book = Book( url, config, set_total=lambda x: total(self.c.download_total_signal, x), update=lambda x: update(self.c.download_num_signal, x), ) try: book.load_catalog() except requests.exceptions.ReadTimeout as rt: print("获取超时") if book.download(): print("全部下载完毕") else: pass book.save( os.path.join(os.getcwd(), 'novels'), book.information['book_name'] + '-' + config['mode_name'] + '.txt')
def read_features(tfrecords_path, batch_size: int, num_threads: int) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: """ :param tfrecords_path: Path to the tfrecords file. :param batch_size: The size of a batch. :param num_threads: The number of threads to load and shuffle. :return: input_images, input_labels, input_image_names """ assert ops.exists(tfrecords_path), "tfrecords file not found: %s" % tfrecords_path # Load configuration cfg = load_config().cfg def extract_batch(x): return TextFeatureReader.extract_features_batch(x, cfg.ARCH.INPUT_SIZE, 3) dataset = tf.data.TFRecordDataset(tfrecords_path) dataset = dataset.batch(cfg.TRAIN.BATCH_SIZE, drop_remainder=True) dataset = dataset.map(extract_batch, num_parallel_calls=num_threads) dataset = dataset.apply(tf.contrib.data.shuffle_and_repeat(batch_size * num_threads)) dataset = dataset.prefetch(buffer_size=batch_size * num_threads) iterator = dataset.make_one_shot_iterator() input_images, input_labels, input_image_names = iterator.get_next() return input_images, input_labels, input_image_names
#!/usr/bin/env python2 import argparse import os from utils import config_utils, os1_utils # The nosetests command to run the integration tests NOSETESTS_COMMAND = 'cd pulp-automation && nosetests -vs --with-xunit' # Setup the CLI description = 'Run integration tests using a deployed environment by deploy-environment.py' parser = argparse.ArgumentParser(description=description) parser.add_argument( '--config', help='path to the configuration file produced by deploy-environment.py', required=True) args = parser.parse_args() config = config_utils.load_config(args.config) print 'Authenticating with OS1...' os1_auth = config.get(config_utils.CONFIG_OS1_CREDENTIALS, {}) os1 = os1_utils.OS1Manager(**os1_auth) print 'Tearing down instances...' os1.teardown_instances(config) os.remove(args.config) print 'Done!'
def train_shadownet(dataset_dir, weights_path=None, decode: bool=False, num_threads=4): """ :param dataset_dir: :param weights_path: :param num_threads: Number of threads to use in tf.train.shuffle_batch :return: """ # Load config cfg = load_config().cfg # decode the tf records to get the training data decoder = data_utils.TextFeatureIO().reader input_images, input_labels, input_image_names = decoder.read_features(ops.join(dataset_dir, 'train_feature.tfrecords'), cfg.TRAIN.BATCH_SIZE, num_threads) # initialise the net model shadownet = crnn_model.ShadowNet(phase='Train', hidden_nums=cfg.ARCH.HIDDEN_UNITS, layers_nums=cfg.ARCH.HIDDEN_LAYERS, num_classes=len(decoder.char_dict) + 1) with tf.variable_scope('shadow', reuse=False): net_out = shadownet.build_shadownet(inputdata=input_images) cost = tf.reduce_mean(tf.nn.ctc_loss(labels=input_labels, inputs=net_out, sequence_length=cfg.ARCH.SEQ_LENGTH*np.ones(cfg.TRAIN.BATCH_SIZE))) decoded, log_prob = tf.nn.ctc_beam_search_decoder(net_out, cfg.ARCH.SEQ_LENGTH*np.ones(cfg.TRAIN.BATCH_SIZE), merge_repeated=False) sequence_dist = tf.reduce_mean(tf.edit_distance(tf.cast(decoded[0], tf.int32), input_labels)) global_step = tf.Variable(0, name='global_step', trainable=False) starter_learning_rate = cfg.TRAIN.LEARNING_RATE learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, cfg.TRAIN.LR_DECAY_STEPS, cfg.TRAIN.LR_DECAY_RATE, staircase=True) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate).minimize(loss=cost, global_step=global_step) # Setup TF summary tboard_save_path = 'tboard/shadownet' if not ops.exists(tboard_save_path): os.makedirs(tboard_save_path) tf.summary.scalar(name='Cost', tensor=cost) tf.summary.scalar(name='Learning_Rate', tensor=learning_rate) if decode: tf.summary.scalar(name='Seq_Dist', tensor=sequence_dist) merge_summary_op = tf.summary.merge_all() # Set saver configuration saver = tf.train.Saver() model_save_dir = cfg.PATH.CRNN_MODEL_SAVE_DIR if not ops.exists(model_save_dir): os.makedirs(model_save_dir) train_start_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) model_name = 'shadownet_{:s}.ckpt'.format(str(train_start_time)) model_save_path = ops.join(model_save_dir, model_name) # Set sess configuration sess_config = tf.ConfigProto() sess_config.gpu_options.per_process_gpu_memory_fraction = cfg.TRAIN.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = cfg.TRAIN.TF_ALLOW_GROWTH sess = tf.Session(config=sess_config) summary_writer = tf.summary.FileWriter(tboard_save_path) summary_writer.add_graph(sess.graph) # Set the training parameters train_epochs = cfg.TRAIN.EPOCHS with sess.as_default(): if weights_path is None: logger.info('Training from scratch') init = tf.global_variables_initializer() sess.run(init) else: logger.info('Restore model from {:s}'.format(weights_path)) saver.restore(sess=sess, save_path=weights_path) cost_history = [np.inf] for epoch in range(train_epochs): if decode: _, c, seq_distance, predictions, labels, summary = sess.run([optimizer, cost, sequence_dist, decoded, input_labels, merge_summary_op]) labels = decoder.sparse_tensor_to_str(labels) predictions = decoder.sparse_tensor_to_str(predictions[0]) accuracy = compute_accuracy(labels, predictions) if epoch % cfg.TRAIN.DISPLAY_STEP == 0: logger.info('Epoch: {:d} cost= {:9f} seq distance= {:9f} train accuracy= {:9f}'.format( epoch + 1, c, seq_distance, accuracy)) else: _, c, summary = sess.run([optimizer, cost, merge_summary_op]) if epoch % cfg.TRAIN.DISPLAY_STEP == 0: logger.info('Epoch: {:d} cost= {:9f}'.format(epoch + 1, c)) cost_history.append(c) summary_writer.add_summary(summary=summary, global_step=epoch) saver.save(sess=sess, save_path=model_save_path, global_step=epoch) return np.array(cost_history[1:]) # Don't return the first np.inf
#!/usr/bin/env python2 import argparse import sys from fabric.api import get, run, settings from utils import config_utils, setup_utils # The nosetests command to run the integration tests NOSETESTS_COMMAND = 'cd pulp-automation && nosetests -vs --with-xunit -x' # Setup the CLI description = 'Run integration tests using a deployed environment by deploy-environment.py' parser = argparse.ArgumentParser(description=description) parser.add_argument('--config', help='path to the configuration file produced by deploy-environment.py', required=True) parser.add_argument('--tests-destination', help='the location to place the nosetests.xml file on completion') args = parser.parse_args() config = config_utils.load_config(args.config) flattened_config = config_utils.flatten_structure(config) tester_config = filter(lambda conf: conf[setup_utils.ROLE] == setup_utils.PULP_TESTER_ROLE, flattened_config)[0] with settings(host_string=tester_config[setup_utils.HOST_STRING], key_filename=tester_config[setup_utils.PRIVATE_KEY]): test_result = run(NOSETESTS_COMMAND, warn_only=True) get('pulp-automation/nosetests.xml', args.tests_destination or tester_config['tests_destination']) sys.exit(test_result.return_code)
# The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE.# import time from args_handler import handle_args, parse_args, set_time_start from report_generator import Report from utils import file_utils, error_handling from utils.config_utils import load_config args = None if __name__ == '__main__': set_time_start(time.time()) args = parse_args() handle_args(args) load_config() # Path & args error_handling.args = args path = file_utils.get_path(args) # Make a report report = Report(path)
"eval_pred": y_pred_array, "eval_prob": y_prob_array[1:, :] } # return the validation loss and validation accuracy return f1_score(y_true_array, y_pred_array, average="macro"), f1_score(y_true_array, y_pred_array, average="micro"), eval_stat #return running_loss / len(loader) if __name__ == "__main__": # initialize a model params = load_config('config.yaml') #model = make_model(h=params["head_number"], d_model=params["d_model"], d_ff=params["d_ff"], dropout=params["dropout"], max_len=params["max_len"], record_dim=params["record_dim"], d_ff_hidden=params["d_ff_hidden"], N=params["encoder_num"], de_factor=params["de_factor"]) model = LinearRegression(params["feature_dim"], params["output_dim"]) # load in the pre-trained model PATH_pretrained = "./models/session_2019-09-08[17_52_06]/model_24.pth" model.load_state_dict(torch.load(PATH_pretrained)) # move the model into the corresponding device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.to(device) # define the criterion # define the criterion # How to get weights: # e.g., weight of NORM = (# all training samples) / (# normal samples) training_loss_weights = [2.488, 43.346, 3.295, 3.683]
print('Saving weights, BLEU {} (prev_best) < {} (cur)'. format(best_bleu, dev_bleu)) saver.save(sess, best_path) best_bleu = dev_bleu FLAGS.best_bleu = dev_bleu config_utils.save_config(FLAGS, path_prefix + "config.json") sess.run(train_graph.restore_backup_vars_op) duration = time.time() - start_time print('Duration %.3f sec' % (duration)) sys.stdout.flush() log_file.write('Duration %.3f sec\n' % (duration)) log_file.flush() log_file.close() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--config_path', type=str, help='Configuration file.') parser.add_argument('--data_split', type=int, default=1) args = parser.parse_args() if args.config_path is not None: print('Loading the configuration from ' + args.config_path) FLAGS = config_utils.load_config(args) tf.app.run(main=main)
import datetime import cv2 import numpy as np import uuid import json import functools import logging import collections from utils.config_utils import load_config logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) # Read configuration for width/height cfg = load_config().cfg @functools.lru_cache(maxsize=1) def get_host_info(): return {} @functools.lru_cache(maxsize=100) def get_crnn(checkpoint_path): import tensorflow as tf from models.crnn import crnn_model from utils import data_utils # Read configuration for width/height w, h = cfg.ARCH.INPUT_SIZE
if self.set_total is not None: self.set_total(len(self.chapters)) for chapter in self.chapters[start:end]: self.downloader.push(chapter.download) for future in as_completed(self.downloader.tasks): if self.update is not None: self.update(1) self.downloader.wait() return True def save(self, path, file_name): safe_mkdir(path) with open(os.path.join(path, file_name), 'w', encoding='utf8') as novel: for chapter in self.chapters: ready_to_write = '# ' + chapter.title + '\n\n' + chapter.body novel.write(ready_to_write + '\n') if __name__ == '__main__': # print(os.path.dirname(os.getcwd())) config = load_config(os.path.dirname(os.getcwd()), "88dush.json") # print(config) book = Book("https://www.88dushu.com/xiaoshuo/1/1552/", config) book.load_catalog() print(book.information) book.download(0, 20) print(book.chapters) book.save(os.getcwd(), "xxx.txt") #