def segment(self, pic, pipe, text, lower=False, use_jieba=False): text = util.as_text(text) sentences = self.ss.segment( text) # Sentences is a sentence with end delimiters . words_no_filter = self.ws.segment_sentences( pic, pipe, sentences=sentences, lower=lower, use_stop_words=False, use_speech_tags_filter=False, use_jieba=use_jieba) words_no_stop_words = self.ws.segment_sentences( pic, pipe, sentences=sentences, lower=lower, use_stop_words=True, use_speech_tags_filter=False, use_jieba=use_jieba) words_all_filters = self.ws.segment_sentences( pic, pipe, sentences=sentences, lower=lower, use_stop_words=True, use_speech_tags_filter=True, use_jieba=use_jieba) return util.AttrDict(sentences=sentences, words_no_filter=words_no_filter, words_no_stop_words=words_no_stop_words, words_all_filters=words_all_filters)
def sort_sentences(sentences, words, sim_func=default_sentence_similarity): """ 将句子按关键程度进行排序 """ sorted_sentences = {} graph = similarity_matrix(words, sim_func) nx_graph = nx.from_numpy_matrix(graph) scores = nx.pagerank(nx_graph) average_score = sum(scores.values()) / len(scores) for index, score in scores.items(): feature_score = util.clue_score(words[index]) * average_score + score if len(words[index]) < 8: feature_score = 0 item = util.AttrDict(sentence=sentences[index], weight=feature_score, words=util.clean_stop_words(words[index])) sorted_sentences[index] = item return sorted_sentences
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') tf.enable_v2_behavior() config_file = most_recent_file(FLAGS.experiment_path, r'config.yaml') assert config_file with open(config_file, 'r') as f: config = util.AttrDict(**yaml.load(f.read())) logging.info('Config:\n%s', pprint.pformat(config)) env = gym.make(config.env) cls = globals()[config.policy] policy = cls(config) # Initialize policy policy.argmax(np.expand_dims(env.reset(), 0)) # Load checkpoint. # Assuming policy is a keras.Model instance. logging.info('policy variables: %s', [v.name for v in policy.trainable_variables]) ckpt = tf.train.Checkpoint(policy=policy) ckpt_file = most_recent_file(FLAGS.experiment_path, r'model.ckpt-[0-9]+') if ckpt_file: ckpt_file = re.findall('^(.*/model.ckpt-[0-9]+)', ckpt_file)[0] logging.info('Checkpoint file: %s', ckpt_file) ckpt.restore(ckpt_file).assert_consumed() else: raise RuntimeError('No checkpoint found') summary_writer = tf.summary.create_file_writer(FLAGS.experiment_path, flush_millis=10000) logging.info('Starting Evaluation') it = (range(FLAGS.num_episodes) if FLAGS.num_episodes >= 0 else itertools.count()) for ep in it: memory = replay.Memory() sample_episode(env, policy, memory, max_episode_length=200) logging.info(ep) with summary_writer.as_default(), summary.always_record_summaries(): summary.scalar('return', memory.observed_rewards().sum(), step=ep) summary.scalar('length', memory.observed_rewards().shape[-1], step=ep) logging.info('DONE')
def segment(self, text, lower = False): ''' 返回util.AttrDict, 包括: sentences:句列表,即根据delimiters分隔的 words_no_filter:分好词的嵌套列表 words_no_stop_words:去除了停词 words_all_filters:去除了停词和词性不在allow_speech_tags中的词 ''' # text = util.as_text(text) sentences = self.ss.segment(text) # 一个filter sentences_list=list(sentences) words_res=self.ws.segment_sentences(sentences_list) # 以上3个列表的属性完全一样, 区别只在是否去停词, 和词性 # TODO: 这里不一定需要返回list, 如果不需要的话, 尽量返回generator return util.AttrDict( sentences = sentences_list, words_no_filter = [list(i) for i in words_res[0]], words_no_stop_words = [list(i) for i in words_res[1]], words_all_filters = [list(i) for i in words_res[2]] )
def segment(self, text, lower = False): sentences = self.ss.segment(text) words_no_filter = self.ws.segment_sentences(sentences=sentences, lower = lower, use_stop_words = False, use_speech_tags_filter = False) words_no_stop_words = self.ws.segment_sentences(sentences=sentences, lower = lower, use_stop_words = True, use_speech_tags_filter = False) words_all_filters = self.ws.segment_sentences(sentences=sentences, lower = lower, use_stop_words = True, use_speech_tags_filter = True) return util.AttrDict( sentences = sentences, words_no_filter = words_no_filter, words_no_stop_words = words_no_stop_words, words_all_filters = words_all_filters )
def sentences_classify(self, sored_scores): category_sentence = {'方法': [], '目的': [], '结果': [], '其他': []} for i, score in enumerate(self.sentences_scores()): sentence = ','.join(self.content_sentences[i]) words = [] for short_words in self.content_words[i]: words += short_words category = max(score, key=lambda x: score[x]) max_score = score[category] item = util.AttrDict(sentence=sentence, sorted_score=sored_scores[i]['weight'], words=words, category_score=max_score, category=category) if max_score >= 1: category_sentence[category].append(item) else: category_sentence['其他'].append(item) for category in category_sentence: category_sentence[category].sort( key=lambda item: item['sorted_score'], reverse=True) return category_sentence
import json from bottle import install, get, request, response, run, HTTPError import bottle_pgpool import psycopg2.pool import util ### API endpoints api_v1 = '/api/v1' ep = util.AttrDict( index = os.path.join(api_v1, 'index'), venues = os.path.join(api_v1, 'venues'), venue = os.path.join(api_v1, 'venues/<id:int>'), venue_nearby = os.path.join(api_v1, 'venues/<id:int>/nearby'), categories = os.path.join(api_v1, 'categories'), category = os.path.join(api_v1, 'categories/<id:int>'), category_venues = os.path.join(api_v1, 'categories/<id:int>/venues'), zips = os.path.join(api_v1, 'zips'), zip = os.path.join(api_v1, 'zips/<zip>'), zip_venues = os.path.join(api_v1, 'zips/<zip>/venues'), ) ### setup config = util.read_config("api") log = util.config_logging(config).getLogger("server") # connection pool pool = psycopg2.pool.ThreadedConnectionPool( minconn=1,
parser.add_argument("--gumbel_sample", action="store_true", help="turn on random selection of path during training") parser.add_argument("--max_chunk_vocab_size", default=10000, type=int, help="size of chunk vocab") parser.add_argument("--test_samples", default=1, type=int, help="number of samples to take") parser.add_argument("--concat_context_vector", action="store_true", help="concat context vector instead of initializing") args = util.AttrDict(vars(parser.parse_args())) print "Args:", args if args.ptb: DATA_LOC = 'data/ptb' DATA_VIEW = 'word' elif args.zh: DATA_LOC = 'data/zh' DATA_VIEW = 'char' else: DATA_LOC = 'data/en/bpe' DATA_VIEW = 'word' args.train_data = DATA_LOC + '/train' args.valid_data = DATA_LOC + '/valid' args.test_data = DATA_LOC + '/test'
import util config = util.AttrDict (**{ # Default values for command line arguments 'data_dir' : '/tmp/zf/data', 'pgn_dir' : '/tmp/zf/pgns', 'model_dir' : '/tmp/zf/model', 'optimizer' : 'Adam', 'learning_rate' : 1e-3, 'l2_scale' : 1e-3, 'num_epochs' : 1, 'batch_size' : 32, 'filters' : 32, 'modules' : 0, # Not parsed from command line 'input_height' : 8, 'input_width' : 8, 'input_channels' : 26, 'classes_shape' : (8, 8, 8, 8), # Defined below 'input_shape' : None, 'input_total' : None, 'n_classes' : None }) config.input_shape = (config.input_height, config.input_width, config.input_channels)
import util import tensorflow as tf import data_util import dual_learning from pprint import pprint # parameters params = util.AttrDict() params.seq2seq = util.AttrDict( max_len_A = 21, max_len_B = 21, ckpt_path_AB = 'en_fr', ckpt_path_BA = 'fr_en', emb_dim = 1024, num_layers = 1, batch_size = 32, steps = 100000, beam_size = 2, alpha = 0.5, ratio_dual = 0.5 ) params.lm_a = util.AttrDict( model_name = 'lm_a', load_model = '../cv2/A/epoch022_6.8524.model', train_dir = '../cv2/A', rnn_size = 650, highway_layers = 2, char_embed_size = 30, kernels = '[1,2,3,4,5,6,7]', kernel_features = '[50,100,150,200,200,200,200]',
def model_fn(features, labels, mode, params): # Training flag training = (mode == tf.estimator.ModeKeys.TRAIN) # Extract and concatenate features for input inputs = features['image'] # Get unscaled log probabilities with tf.variable_scope('inference', reuse=params.get('reuse', False), custom_getter=collection_getter): policy, value = inference(inputs, filters=params['filters'], modules=params['modules'], n_classes=params['n_classes'], training=training) # Add summaries to weights for var in tf.trainable_variables(): tf.summary.histogram(var.name.split(':')[0] + '_summary', var) # Specification spec = util.AttrDict(mode=mode, features=features, predictions=(policy, value)) # Return early inference specification if mode == tf.estimator.ModeKeys.PREDICT: return spec with tf.variable_scope('losses'): # Value loss value_loss = tf.losses.mean_squared_error(labels=labels['value'], predictions=value, weights=1.0 / 4.0) policy_loss = tf.losses.softmax_cross_entropy( onehot_labels=labels['policy'], logits=policy, weights=1.0) # Get l2 regularization loss l2_loss = tf.contrib.layers.apply_regularization( tf.contrib.layers.l2_regularizer(params['l2_scale'])) tf.losses.add_loss(l2_loss) # Total loss loss = tf.losses.get_total_loss(add_regularization_losses=False) # Add total loss to loss collection tf.add_to_collection(tf.GraphKeys.LOSSES, loss) # Add summaries for losses for loss_tensor in tf.get_collection(tf.GraphKeys.LOSSES): tf.summary.scalar( loss_tensor.name.split(':')[0] + '_summary', loss_tensor) spec.labels = labels spec.loss = loss spec.eval_metric_ops = util.AttrDict() # Return early evaluation specification if mode == tf.estimator.ModeKeys.EVAL: return spec # Get global step for training op global_step = tf.train.get_global_step() with tf.variable_scope('train'): # Get optimizer function optimizer_fn = { 'Adam': tf.train.AdamOptimizer, 'RMSProp': tf.train.RMSPropOptimizer, 'GradientDescent': tf.train.GradientDescentOptimizer }[params.get('optimizer', 'Adam')] optimizer = optimizer_fn(params['learning_rate']) # Compute gradients and add summaries grads_and_tvars = optimizer.compute_gradients(spec.loss) # Create train operation update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.apply_gradients(grads_and_tvars, global_step=global_step) # Add summaries for gradients with tf.variable_scope('gradients'): tf.contrib.training.add_gradients_summaries(grads_and_tvars) spec.train_op = train_op # Return full train specification return spec