def Eval(sess): """Builds and evaluates a network.""" logging.set_verbosity(logging.INFO) #bpe = BPE(codecs.open("code-file", encoding='utf-8'), "@@") wordMapPath = "ner_models/ner_word2id" nerMapPath = "ner_models/ner_tag2id" pMapPath = "ner_models/ner_prefix2id" sMapPath = "ner_models/ner_suffix2id" prefix2id = utils.read_pickle_file(pMapPath) suffix2id = utils.read_pickle_file(sMapPath) word2id = utils.read_pickle_file(wordMapPath) tag2id = utils.read_pickle_file(nerMapPath) loading_time = time.time() logging.info("loading data and precomputing features...") dataset = Dataset(None, None, FLAGS.test_corpus, format_list=['FORM', 'a', 'b', 'NER']) dataset.load_dataset(word2id=word2id, tag2id=tag2id, prefix2id=prefix2id, suffix2id=suffix2id, fgen=False) logging.info('training sentences: %d', dataset.get_sent_num('test')) logging.info("logging time: %.2f", time.time() - loading_time) '''
def load_model(dragnn_spec, resource_path, checkpoint_filename, enable_tracing=False, tf_master=''): logging.set_verbosity(logging.WARN) # check check.IsTrue(dragnn_spec) check.IsTrue(resource_path) check.IsTrue(checkpoint_filename) # Load master spec master_spec = load_master_spec(dragnn_spec, resource_path) # Build graph graph, builder, annotator = build_inference_graph( master_spec, enable_tracing=enable_tracing) with graph.as_default(): # Restore model sess = tf.Session(target=tf_master, graph=graph) # Make sure to re-initialize all underlying state. sess.run(tf.global_variables_initializer()) builder.saver.restore(sess, checkpoint_filename) m = {} m['session'] = sess m['graph'] = graph m['builder'] = builder m['annotator'] = annotator return m
def main(unused_argv): logging.set_verbosity(logging.INFO) if not gfile.IsDirectory(OutputPath('')): gfile.MakeDirs(OutputPath('')) loading_time = time.time() train_data_path = '/cs/natlang-user/vivian/wsj-conll/train.conllu' dev_data_path = '/cs/natlang-user/vivian/wsj-conll/dev.conllu' test_data_path = '/cs/natlang-user/vivian/wsj-conll/test.conllu' logging.info("loading data and precomputing features...") dataset = Dataset(train_data_path, dev_data_path, test_data_path) dataset.load_dataset() logging.info('training sentences: %d', dataset.get_sent_num('train')) logging.info("logging time: %.2f", time.time() - loading_time) logging.info(dataset.number_of_classes) feature_sizes = [ 8, 8, 2, 8, 8, 4 ] #num of features for each feature group: capitalization, words, prefix_2, suffix_2, tags_history domain_sizes = [ 3, dataset.vocabulary_size, 3, dataset.prefix_size, dataset.suffix_size, dataset.number_of_classes + 1 ] num_actions = dataset.number_of_classes embedding_dims = [8, 100, 8, 50, 50, 50] with tf.Session(FLAGS.tf_master) as sess: Train(sess, num_actions, feature_sizes, domain_sizes, embedding_dims, dataset)
def pretty_print(): _write_input(_read_output().strip()) logging.set_verbosity(logging.INFO) with tf.Session() as sess: src = gen_parser_ops.document_source( batch_size=32, corpus_name='input-from-file-conll', task_context=task_context_path) sentence = sentence_pb2.Sentence() while True: documents, finished = sess.run(src) logging.info('Read %d documents', len(documents)) # for d in documents: # sentence.ParseFromString(d) # as_asciitree(sentence) for d in documents: sentence.ParseFromString(d) tr = asciitree.LeftAligned() d = to_dict(sentence) print('Input: %s' % sentence.text) print('Parse:') tr_str = tr(d) pat = re.compile(r'\s*@\d+$') for tr_ln in tr_str.splitlines(): print(pat.sub('', tr_ln)) if finished: break
def main(unused_argv): logging.set_verbosity(logging.INFO) if not gfile.IsDirectory(OutputPath('')): gfile.MakeDirs(OutputPath('')) loading_time = time.time() train_data_path = '/cs/natlang-data/CoNLL/CoNLL-2003/eng.testa' dev_data_path = '/cs/natlang-data/CoNLL/CoNLL-2003/eng.testa' test_data_path = '/cs/natlang-data/CoNLL/CoNLL-2003/eng.testb' logging.info("loading data and precomputing features...") dataset = Dataset(train_data_path, dev_data_path, test_data_path) dataset.load_dataset() logging.info('training sentences: %d', dataset.get_sent_num('train')) logging.info("logging time: %.2f", time.time() - loading_time) feature_sizes = [ 8 ] * 5 #num of features for each feature group: capitalization, words, prefix_2, suffix_2, pos_tags domain_sizes = [ 3, dataset.vocabulary_size, dataset.prefix_size, dataset.suffix_size, dataset.pos_classes ] num_actions = dataset.number_of_classes embedding_dims = [8, 64, 16, 16, 16] #logging.info(train_data.vocabulary_size) #logging.info(train_data.prefix_size) #logging.info(train_data.suffix_size) with tf.Session(FLAGS.tf_master) as sess: Train(sess, num_actions, feature_sizes, domain_sizes, embedding_dims, dataset)
def main(): logging.set_verbosity(logging.INFO) parser = create_parser() args = parser.parse_args() if not hasattr(args, 'func'): parser.error('too few arguments') args.func(args)
def load_model(self, base_dir, master_spec_name, checkpoint_name): # Read the master spec master_spec = spec_pb2.MasterSpec() with open(os.path.join(base_dir, master_spec_name), "r") as f: text_format.Merge(f.read(), master_spec) spec_builder.complete_master_spec(master_spec, None, base_dir) logging.set_verbosity(logging.WARN) # Turn off TensorFlow spam. # Initialize a graph graph = tf.Graph() with graph.as_default(): hyperparam_config = spec_pb2.GridPoint() builder = graph_builder.MasterBuilder(master_spec, hyperparam_config) # This is the component that will annotate test sentences. annotator = builder.add_annotation(enable_tracing=True) builder.add_saver( ) # "Savers" can save and load models; here, we're only going to load. sess = tf.Session(graph=graph) with graph.as_default(): # sess.run(tf.global_variables_initializer()) # sess.run('save/restore_all', {'save/Const:0': os.path.join(base_dir, checkpoint_name)}) builder.saver.restore(sess, os.path.join(base_dir, checkpoint_name)) def annotate_sentence(sentence): with graph.as_default(): return sess.run( [annotator['annotations'], annotator['traces']], feed_dict={annotator['input_batch']: [sentence]}) return annotate_sentence
def main(unused_argv): logging.set_verbosity(logging.INFO) if not gfile.IsDirectory(OutputPath('')): gfile.MakeDirs(OutputPath('')) loading_time = time.time() logging.info("loading data and precomputing features...") dataset = Dataset(FLAGS.training_corpus, FLAGS.tuning_corpus, data_output=FLAGS.output_path) dataset.load_dataset() logging.info('training sentences: %d', dataset.get_sent_num('train')) logging.info("logging time: %.2f", time.time() - loading_time) logging.info(dataset.number_of_classes) num_actions = dataset.number_of_classes if FLAGS.word_only: feature_sizes = [8] domain_sizes = [dataset.vocabulary_size] embedding_dims = [100] else: feature_sizes = [ 8, 8, 2, 8, 8, 4 ] #num of features for each feature group: capitalization, words, prefix_2, suffix_2, tags_history domain_sizes = [ 3, dataset.vocabulary_size, 3, dataset.prefix_size, dataset.suffix_size, dataset.number_of_classes + 1 ] embedding_dims = [8, 100, 8, 50, 50, 50] with tf.Session(FLAGS.tf_master) as sess: Train(sess, num_actions, feature_sizes, domain_sizes, embedding_dims, dataset)
def main(unused_argv): logging.set_verbosity(logging.INFO) with tf.Session() as sess: src = gen_parser_ops.document_source(batch_size=32, corpus_name=FLAGS.corpus_name, task_context=FLAGS.task_context) sentence = sentence_pb2.Sentence() while True: documents, finished = sess.run(src) logging.info('Read %d documents', len(documents)) for d in documents: sentence.ParseFromString(d) #print '...Sentence string before serialization: ', d tr = asciitree.LeftAligned() d = to_dict(sentence) print 'Input: %s' % sentence.text serializedStr = sentence.SerializeToString() #print '...Sentence string protobuf: ', serializedStr file = open("/Users/yihed/Documents/workspace/Other/src/thmp/data/serializedSentence.txt", "wb") #file = open("serializedSentence.txt", "wb") file.write(serializedStr) file.close() print 'Parse:' print tr(d) if finished: break
def _logging_show_info(): try: verbosity = logging.get_verbosity() logging.set_verbosity(logging.INFO) yield finally: logging.set_verbosity(verbosity)
def train(data_folder): g = tf.Graph() with g.as_default(): # Load dataset. frames, audio, ground_truth, _ = data_provider.get_split( data_folder, True, 'train', FLAGS.batch_size, seq_length=FLAGS.seq_length) # Define model graph. with slim.arg_scope([slim.batch_norm, slim.layers.dropout], is_training=True): with slim.arg_scope( slim.nets.resnet_utils.resnet_arg_scope(is_training=True)): prediction = models.get_model(FLAGS.model)( frames, audio, hidden_units=FLAGS.hidden_units) for i, name in enumerate(['arousal', 'valence']): pred_single = tf.reshape(prediction[:, :, i], (-1, )) gt_single = tf.reshape(ground_truth[:, :, i], (-1, )) loss = losses.concordance_cc(pred_single, gt_single) tf.summary.scalar('losses/{} loss'.format(name), loss) mse = tf.reduce_mean(tf.square(pred_single - gt_single)) tf.summary.scalar('losses/mse {} loss'.format(name), mse) tf.losses.add_loss(loss / 2.) total_loss = tf.losses.get_total_loss() tf.summary.scalar('losses/total loss', total_loss) optimizer = tf.train.AdamOptimizer(FLAGS.initial_learning_rate) init_fn = None with tf.Session(graph=g) as sess: if FLAGS.pretrained_model_checkpoint_path: def name_in_checkpoint(var): return var.op.name[12:] variables_to_restore = slim.get_model_variables() init_fn = slim.assign_from_checkpoint_fn( FLAGS.pretrained_model_checkpoint_path, variables_to_restore) train_op = slim.learning.create_train_op(total_loss, optimizer, summarize_gradients=True) logging.set_verbosity(1) slim.learning.train(train_op, FLAGS.train_dir, init_fn=init_fn, save_summaries_secs=60, save_interval_secs=300)
def train(data_folder): """Trains the audio model. Args: data_folder: The folder that contains the training data. """ g = tf.Graph() with g.as_default(): # Load dataset. provider = data_provider.get_provider(FLAGS.task) audio, ground_truth, _ = provider(data_folder).get_split( FLAGS.portion, FLAGS.batch_size) # Define model graph. with slim.arg_scope([slim.batch_norm, slim.layers.dropout], is_training=True): prediction = models.get_model(FLAGS.model)( audio, num_classes=provider.num_classes, num_lstm_modules=FLAGS.num_lstm_modules) loss = tf.nn.weighted_cross_entropy_with_logits(prediction, ground_truth, pos_weight=1) loss = slim.losses.compute_weighted_loss(loss) total_loss = slim.losses.get_total_loss() accuracy = tf.reduce_mean( tf.to_float( tf.equal(tf.argmax(ground_truth, 1), tf.argmax(prediction, 1)))) chance_accuracy = tf.reduce_mean( tf.to_float(tf.equal(tf.argmax(ground_truth, 1), 0))) tf.scalar_summary('losses/total loss', total_loss) tf.scalar_summary('accuracy', accuracy) tf.scalar_summary('chance accuracy', chance_accuracy) tf.histogram_summary('labels', tf.argmax(ground_truth, 1)) tf.scalar_summary('losses/Cross Entropy Loss', loss) optimizer = tf.train.AdamOptimizer(FLAGS.initial_learning_rate) with tf.Session(graph=g) as sess: if FLAGS.pretrained_model_checkpoint_path: variables_to_restore = slim.get_variables_to_restore() saver = tf.train.Saver(variables_to_restore) saver.restore(sess, FLAGS.pretrained_model_checkpoint_path) train_op = slim.learning.create_train_op(total_loss, optimizer, summarize_gradients=True) logging.set_verbosity(1) slim.learning.train(train_op, FLAGS.train_dir, save_summaries_secs=60, save_interval_secs=600)
def main(unused_argv=None): logging.set_verbosity(logging.INFO) tf_version = versions.__version__ print('TensorFlow version %s detected' % tf_version) print('Welcome to the Cloud TPU Profiler v%s' % profiler_version.__version__) if LooseVersion(tf_version) < LooseVersion('1.14.0'): sys.exit('You must install tensorflow >= 1.14.0 to use this plugin.') if not FLAGS.service_addr and not FLAGS.tpu: sys.exit('You must specify either --service_addr or --tpu.') tpu_cluster_resolver = None if FLAGS.service_addr: if FLAGS.tpu: logging.warn('Both --service_addr and --tpu are set. Ignoring ' '--tpu and using --service_addr.') service_addr = FLAGS.service_addr else: try: tpu_cluster_resolver = (resolver.TPUClusterResolver( [FLAGS.tpu], zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)) service_addr = tpu_cluster_resolver.get_master() except (ValueError, TypeError): sys.exit( 'Failed to find TPU %s in zone %s project %s. You may use ' '--tpu_zone and --gcp_project to specify the zone and project of' ' your TPU.' % (FLAGS.tpu, FLAGS.tpu_zone, FLAGS.gcp_project)) service_addr = service_addr.replace('grpc://', '').replace(':8470', ':8466') workers_list = '' if FLAGS.workers_list is not None: workers_list = FLAGS.workers_list elif tpu_cluster_resolver is not None: workers_list = get_workers_list(tpu_cluster_resolver) # If profiling duration was not set by user or set to a non-positive value, # we set it to a default value of 1000ms. duration_ms = FLAGS.duration_ms if FLAGS.duration_ms > 0 else 1000 if FLAGS.monitoring_level > 0: print('Since monitoring level is provided, profile', service_addr, ' for ', FLAGS.duration_ms, ' ms and show metrics for ', FLAGS.num_queries, ' time(s).') monitoring_helper(service_addr, duration_ms, FLAGS.monitoring_level, FLAGS.display_timestamp, FLAGS.num_queries) else: if not FLAGS.logdir: sys.exit('You must specify either --logdir or --monitoring_level.') try: profiler_client.start_tracing(service_addr, os.path.expanduser(FLAGS.logdir), duration_ms, workers_list, FLAGS.include_dataset_ops, FLAGS.num_tracing_attempts) except errors.UnavailableError: sys.exit(0)
def main(unused_argv) : if len(sys.argv) == 1 : flags._global_parser.print_help() sys.exit(0) logging.set_verbosity(logging.INFO) check.IsTrue(FLAGS.training_corpus_path) check.IsTrue(FLAGS.tune_corpus_path) check.IsTrue(FLAGS.resource_path) check.IsTrue(FLAGS.checkpoint_filename) if not gfile.IsDirectory(FLAGS.resource_path): gfile.MakeDirs(FLAGS.resource_path) training_corpus_path = gfile.Glob(FLAGS.training_corpus_path)[0] tune_corpus_path = gfile.Glob(FLAGS.tune_corpus_path)[0] # SummaryWriter for TensorBoard tf.logging.info('TensorBoard directory: "%s"', FLAGS.tensorboard_dir) tf.logging.info('Deleting prior data if exists...') stats_file = '%s.stats' % FLAGS.checkpoint_filename try : stats = gfile.GFile(stats_file, 'r').readlines()[0].split(',') stats = [int(x) for x in stats] except errors.OpError : stats = [-1, 0, 0] tf.logging.info('Read ckpt stats: %s', str(stats)) do_restore = True if stats[0] < FLAGS.job_id : do_restore = False tf.logging.info('Deleting last job: %d', stats[0]) try : gfile.DeleteRecursively(FLAGS.tensorboard_dir) gfile.Remove(FLAGS.checkpoint_filename) except errors.OpError as err : tf.logging.error('Unable to delete prior files: %s', err) stats = [FLAGS.job_id, 0, 0] tf.logging.info('Creating the directory again...') gfile.MakeDirs(FLAGS.tensorboard_dir) tf.logging.info('Created! Instatiating SummaryWriter...') summary_writer = trainer_lib.get_summary_writer(FLAGS.tensorboard_dir) tf.logging.info('Creating TensorFlow checkpoint dir...') gfile.MakeDirs(os.path.dirname(FLAGS.checkpoint_filename)) # Constructs lexical resources for SyntaxNet in the given resource path, from # the training data. if FLAGS.compute_lexicon : logging.info('Computing lexicon...') lexicon.build_lexicon(FLAGS.resource_path, training_corpus_path, morph_to_pos=True) # Load master spec master_spec = model.load_master_spec(FLAGS.dragnn_spec, FLAGS.resource_path) # Build graph graph, builder, trainers, annotator = model.build_train_graph(master_spec) # Train train(graph, builder, trainers, annotator, summary_writer, do_restore, stats)
def train(data_folder): g = tf.Graph() with g.as_default(): # Load dataset. frames, audio, ground_truth, _ = data_provider.get_split( data_folder, True, 'train', FLAGS.batch_size, seq_length=FLAGS.seq_length) #https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/framework/python/ops/arg_scope.py # Define model graph. with slim.arg_scope([slim.batch_norm, slim.layers.dropout], is_training=True): with slim.arg_scope( slim.nets.resnet_utils.resnet_arg_scope(is_training=True)): prediction = models.recurrent_model( models.audio_model(audio_frames=audio), hidden_units=256) for i, name in enumerate(['arousal', 'valence']): pred_single = tf.reshape(prediction[:, :, i], (-1, )) gt_single = tf.reshape(ground_truth[:, :, i], (-1, )) loss = losses.concordance_cc(pred_single, gt_single) tf.summary.scalar('losses/{} loss'.format(name), loss) mse = tf.reduce_mean(tf.square(pred_single - gt_single)) tf.summary.scalar('losses/mse {} loss'.format(name), mse) slim.losses.add_loss(loss / 2.) total_loss = slim.losses.get_total_loss() tf.summary.scalar('losses/total loss', total_loss) optimizer = tf.train.AdamOptimizer(FLAGS.initial_learning_rate) init_fn = None with tf.Session(graph=g) as sess: if FLAGS.pretrained_model_checkpoint_path: # Need to specify which variables to restore (use scope of models) variables_to_restore = slim.get_variables() init_fn = slim.assign_from_checkpoint_fn( FLAGS.pretrained_model_checkpoint_path, variables_to_restore) train_op = slim.learning.create_train_op(total_loss, optimizer, summarize_gradients=True) logging.set_verbosity(1) slim.learning.train(train_op, FLAGS.train_dir, init_fn=init_fn, save_summaries_secs=60, save_interval_secs=300)
def Eval(sess): """Builds and evaluates a network.""" logging.set_verbosity(logging.INFO) #bpe = BPE(codecs.open("code-file", encoding='utf-8'), "@@") wordMapPath = "ner_word_map" nerMapPath = "ner_ner_map" pMapPath = "ner_prefix_map" sMapPath = "ner_suffix_map" posMapPath = "ner_pos_map" pMap = utils.read_pickle_file(pMapPath) sMap = utils.read_pickle_file(sMapPath) wordMap = utils.read_pickle_file(wordMapPath) posMap = utils.read_pickle_file(posMapPath) nerMap = utils.read_pickle_file(nerMapPath) loading_time = time.time() train_data_path = '/cs/natlang-data/CoNLL/CoNLL-2003/eng.testa' dev_data_path = '/cs/natlang-data/CoNLL/CoNLL-2003/eng.testa' test_data_path = '/cs/natlang-data/CoNLL/CoNLL-2003/eng.testb' logging.info("loading data and precomputing features...") dataset = Dataset(train_data_path, dev_data_path, test_data_path) dataset.load_dataset(word_map=wordMap, tag_map=posMap, char_map=None, ner_map=nerMap, prefix_map=pMap, suffix_map=sMap) logging.info('training sentences: %d', dataset.get_sent_num('test')) logging.info("logging time: %.2f", time.time() - loading_time) feature_sizes = [8]*5 #num of features for each feature group: capitalization, words, prefix_2, suffix_2, pos_tags domain_sizes = [3, dataset.vocabulary_size, dataset.prefix_size, dataset.suffix_size, dataset.pos_classes] num_actions = dataset.number_of_classes embedding_dims = [8,64,16,16,16] t = time.time() hidden_layer_sizes = map(int, FLAGS.hidden_layer_sizes.split(',')) logging.info('Building training network with parameters: feature_sizes: %s ' 'domain_sizes: %s', feature_sizes, domain_sizes) tagger = GreedyTagger(num_actions, feature_sizes, domain_sizes, embedding_dims, hidden_layer_sizes, gate_gradients=True) tagger.AddEvaluation(FLAGS.batch_size) tagger.AddSaver() sess.run(tagger.inits.values()) tagger.saver.restore(sess, FLAGS.model_path) t = time.time() num_epochs = None num_tokens = 0 num_correct = 0 index = 0 epochs = 0 epochs, sent_batch = loadBatch(FLAGS.batch_size, epochs, dataset, 'test')
def main(unused_argv): logging.set_verbosity(logging.INFO) with tf.Session() as sess: feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=FLAGS.task_context, arg_prefix=FLAGS.arg_prefix)) with tf.Session() as sess: EvalForever(sess, num_actions, feature_sizes, domain_sizes, embedding_dims)
def set_logger_file(logfile="log.txt"): # 此处为tf.logging添加文件日志 from logging.handlers import RotatingFileHandler formatter = tf.logging._logging.Formatter( '%(asctime)s %(filename)s:%(lineno)s [%(levelname)s] %(message)s') file_handler = RotatingFileHandler(logfile, maxBytes=50 * 1024 * 1024, backupCount=5) file_handler.setFormatter(formatter) tf.logging._logger.addHandler(file_handler) tf_logging.set_verbosity(tf.logging.INFO)
def train(self): g = tf.Graph() logging.set_verbosity(10) with g.as_default(): # Load datasets. images, *datas = self._get_data() images /= 255. # Define model graph. with tf.variable_scope('net'): with slim.arg_scope([slim.batch_norm, slim.layers.dropout], is_training=True): predictions, states = self._build_network(images, datas) # custom losses self._build_losses(predictions, states, images, datas) # total losses total_loss = slim.losses.get_total_loss() tf.summary.scalar('losses/total loss', total_loss) # image summaries self._build_summaries(predictions, states, images, datas) # learning rate decay global_step = slim.get_or_create_global_step() learning_rate = tf.train.exponential_decay( FLAGS.initial_learning_rate, global_step, FLAGS.learning_rate_decay_step / FLAGS.batch_size, FLAGS.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning rate', learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate) with tf.Session(graph=g) as sess: init_fn = self._build_restore_fn(sess) train_op = slim.learning.create_train_op(total_loss, optimizer, summarize_gradients=True) logging.set_verbosity(1) slim.learning.train(train_op, FLAGS.train_dir, save_summaries_secs=60, init_fn=init_fn, save_interval_secs=600)
def main(unused_argv=None): logdir = os.path.expanduser(FLAGS.logdir) event_file = os.path.expanduser(FLAGS.event_file) if FLAGS.debug: logging.set_verbosity(logging.DEBUG) logging.info('TensorBoard is in debug mode.') if FLAGS.inspect: logging.info('Not bringing up TensorBoard, but inspecting event files.') efi.inspect(logdir, event_file, FLAGS.tag) return 0 if not logdir: msg = ('A logdir must be specified. Run `tensorboard --help` for ' 'details and examples.') logging.error(msg) print(msg) return -1 logging.info('Starting TensorBoard in directory %s', os.getcwd()) path_to_run = server.ParseEventFilesSpec(logdir) logging.info('TensorBoard path_to_run is: %s', path_to_run) multiplexer = event_multiplexer.EventMultiplexer( size_guidance=server.TENSORBOARD_SIZE_GUIDANCE, purge_orphaned_data=FLAGS.purge_orphaned_data) server.StartMultiplexerReloadingThread(multiplexer, path_to_run, FLAGS.reload_interval) try: tb_server = server.BuildServer(multiplexer, FLAGS.host, FLAGS.port) except socket.error: if FLAGS.port == 0: msg = 'Unable to find any open ports.' logging.error(msg) print(msg) return -2 else: msg = 'Tried to connect to port %d, but address is in use.' % FLAGS.port logging.error(msg) print(msg) return -3 try: tag = resource_loader.load_resource('tensorboard/TAG').strip() logging.info('TensorBoard is tag: %s', tag) except IOError: logging.info('Unable to read TensorBoard tag') tag = '' status_bar.SetupStatusBarInsideGoogle('TensorBoard %s' % tag, FLAGS.port) print('Starting TensorBoard %s on port %d' % (tag, FLAGS.port)) print('(You can navigate to http://%s:%d)' % (FLAGS.host, FLAGS.port)) tb_server.serve_forever()
def main(unused_argv=None): if FLAGS.debug: logging.set_verbosity(logging.DEBUG) logging.info('TensorBoard is in debug mode.') if FLAGS.inspect: logging.info( 'Not bringing up TensorBoard, but inspecting event files.') efi.inspect(logdir=FLAGS.logdir, event_file=FLAGS.event_file, tag=FLAGS.tag) return 0 if not FLAGS.logdir: msg = ('A logdir must be specified. Run `tensorboard --help` for ' 'details and examples.') logging.error(msg) print(msg) return -1 logging.info('Starting TensorBoard in directory %s', os.getcwd()) path_to_run = server.ParseEventFilesSpec(FLAGS.logdir) logging.info('TensorBoard path_to_run is: %s', path_to_run) multiplexer = event_multiplexer.EventMultiplexer( size_guidance=server.TENSORBOARD_SIZE_GUIDANCE, purge_orphaned_data=FLAGS.purge_orphaned_data) server.StartMultiplexerReloadingThread(multiplexer, path_to_run, FLAGS.reload_interval) try: tb_server = server.BuildServer(multiplexer, FLAGS.host, FLAGS.port) except socket.error: if FLAGS.port == 0: msg = 'Unable to find any open ports.' logging.error(msg) print(msg) return -2 else: msg = 'Tried to connect to port %d, but address is in use.' % FLAGS.port logging.error(msg) print(msg) return -3 try: tag = resource_loader.load_resource('tensorboard/TAG').strip() logging.info('TensorBoard is tag: %s', tag) except IOError: logging.warning('Unable to read TensorBoard tag') tag = '' status_bar.SetupStatusBarInsideGoogle('TensorBoard %s' % tag, FLAGS.port) print('Starting TensorBoard %s on port %d' % (tag, FLAGS.port)) print('(You can navigate to http://%s:%d)' % (FLAGS.host, FLAGS.port)) tb_server.serve_forever()
def train(): g = tf.Graph() with g.as_default(): # Load dataset. provider = data_provider.AFLW() images, normals, segmentation = provider.get('normals', 'segmentation') # Define model graph. with tf.variable_scope('net'): with slim.arg_scope([slim.batch_norm, slim.layers.dropout], is_training=True): prediction, pyramid = face_model.multiscale_seg_net(images) # Add a smoothed l1 loss to every scale and the combined output. # for net in [prediction] + pyramid: # loss = losses.smooth_l1(net, normals) # slim.losses.add_loss(loss) for net in [prediction] + pyramid: onehot_labels = tf.reshape(segmentation, (-1, 1)) onehot_labels = tf.concat(1, [1 - onehot_labels, onehot_labels]) net = tf.reshape(net, (-1, 2)) slim.losses.softmax_cross_entropy(net, onehot_labels) total_loss = slim.losses.get_total_loss() tf.scalar_summary('losses/total loss', total_loss) optimizer = tf.train.AdamOptimizer(FLAGS.initial_learning_rate) with tf.Session(graph=g) as sess: saver = tf.train.Saver([ v for v in tf.trainable_variables() if 'seg' not in v.name and 'nrm' not in v.name ]) if FLAGS.pretrained_model_checkpoint_path: saver.restore(sess, FLAGS.pretrained_model_checkpoint_path) # train_op, train_step_fn = utils.create_train_op( # total_loss, optimizer, iter_step=30, summarize_gradients=True) train_op = slim.learning.create_train_op(total_loss, optimizer, summarize_gradients=True) logging.set_verbosity(1) slim.learning.train( train_op, FLAGS.train_dir, # train_step_fn=train_step_fn, save_summaries_secs=60, save_interval_secs=600)
def _run_graph(self, device, output_shape, variable, num_outputs, axis): """Run the graph and print its execution time. Args: device: string, the device to run on. output_shape: shape of each output tensors. variable: whether or not the output shape should be fixed num_outputs: the number of outputs to split the input into axis: axis to be split Returns: The duration of the run in seconds. """ graph = ops.Graph() with graph.as_default(): if not variable: if axis == 0: input_shape = [ output_shape[0] * num_outputs, output_shape[1] ] sizes = [output_shape[0] for _ in range(num_outputs)] else: input_shape = [ output_shape[0], output_shape[1] * num_outputs ] sizes = [output_shape[1] for _ in range(num_outputs)] else: sizes = np.random.randint(low=max(1, output_shape[axis] - 2), high=output_shape[axis] + 2, size=num_outputs) total_size = np.sum(sizes) if axis == 0: input_shape = [total_size, output_shape[1]] else: input_shape = [output_shape[0], total_size] outputs = build_graph(device, input_shape, sizes, axis) config = config_pb2.ConfigProto(graph_options=config_pb2.GraphOptions( optimizer_options=config_pb2.OptimizerOptions( opt_level=config_pb2.OptimizerOptions.L0))) with session_lib.Session(graph=graph, config=config) as session: logging.set_verbosity("info") variables.global_variables_initializer().run() bench = benchmark.TensorFlowBenchmark() bench.run_op_benchmark(session, outputs, mbs=input_shape[0] * input_shape[1] * 4 * 2 * 100 / 1e6, extras={ "input_shape": input_shape, "variable": variable, "axis": axis })
def train(): g = tf.Graph() with g.as_default(): # load data get iterator data_loader = data_utils_mean.DataLoader(SEQUENCE_LENGTH, BATCH_SIZE, NUM_EPOCHS) iterator = data_loader.load_data(TRAIN_TFR_PATH, True) with tf.Session(graph=g) as sess: frameNo, image, label = iterator.get_next() # VGG FACE network VGGFace_network = vgg_face.VGGFace(SEQUENCE_LENGTH * BATCH_SIZE) image_batch = reshape_to_cnn(image) VGGFace_network.setup(image_batch, trainable=False) face_output = VGGFace_network.get_face_fc0() # RNN part rnn_in = reshape_to_rnn(face_output) prediction = models.get_prediction_lstm(rnn_in) prediction = tf.reshape(prediction, [BATCH_SIZE, SEQUENCE_LENGTH, 2]) label_batch = tf.reshape(label, [BATCH_SIZE, SEQUENCE_LENGTH, 2]) # compute losses using slim compute_loss(prediction, label_batch) total_loss = slim.losses.get_total_loss() optimizer = tf.train.AdamOptimizer(LEARNING_RATE) # restore VGG-FACE model at the beginning restore_names = VGGFace_network.get_restore_vars() variables_to_restore = tf.contrib.framework.get_variables_to_restore(include=restore_names) init_fn = slim.assign_from_checkpoint_fn(VGG_RESTORE_PATH, variables_to_restore, ignore_missing_vars=False) # summarize_gradients : Whether or not add summaries for each gradient. # variables_to_train: an optional list of variables to train. If None, it will default to all tf.trainable_variables(). train_op = slim.learning.create_train_op(total_loss, optimizer, summarize_gradients=True # Whether or not add summaries for each gradient. ) loggingTF.set_verbosity(1) # keep 10000 ckpts saver = tf.train.Saver(max_to_keep=10000) # including initialize local and global variables slim.learning.train(train_op, TRAIN_DIR, init_fn=init_fn, save_summaries_secs=60 * 15, # How often, in seconds, to save summaries. log_every_n_steps=500, # The frequency, in terms of global steps, that the loss and global step are logged. save_interval_secs=60 * 15, # How often, in seconds, to save the model to `logdir`. saver=saver )
def train(data_folder): tf.set_random_seed(1) g = tf.Graph() with g.as_default(): # Load dataset. audio_frames, ground_truth, _ = data_provider( data_folder, True, 'train', FLAGS.batch_size, seq_length=FLAGS.seq_length) # Define model graph. with slim.arg_scope([slim.layers.batch_norm, slim.layers.dropout], is_training=True): prediction = models.get_model(FLAGS.model)( audio_frames, hidden_units=FLAGS.hidden_units) for i, name in enumerate(['arousal', 'valence']): #, 'liking']): pred_single = tf.reshape(prediction[:, :, i], (-1, )) gt_single = tf.reshape(ground_truth[:, :, i], (-1, )) loss = losses.concordance_cc(pred_single, gt_single) tf.summary.scalar('losses/{} loss'.format(name), loss) mse = tf.reduce_mean(tf.square(pred_single - gt_single)) tf.summary.scalar('losses/mse {} loss'.format(name), mse) tf.losses.add_loss(loss / 2.) #print(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) total_loss = tf.losses.get_total_loss() tf.summary.scalar('losses/total loss', total_loss) optimizer = tf.train.AdamOptimizer(FLAGS.initial_learning_rate, beta1=0.9, beta2=0.99) with tf.Session(graph=g) as sess: train_op = slim.learning.create_train_op(total_loss, optimizer, summarize_gradients=True) logging.set_verbosity(1) slim.learning.train(train_op, FLAGS.train_dir, save_summaries_secs=60, save_interval_secs=120)
def _run_graph(self, device, output_shape, variable, num_outputs, axis): """Run the graph and print its execution time. Args: device: string, the device to run on. output_shape: shape of each output tensors. variable: whether or not the output shape should be fixed num_outputs: the number of outputs to split the input into axis: axis to be split Returns: The duration of the run in seconds. """ graph = tf.Graph() with graph.as_default(): if not variable: if axis == 0: input_shape = [output_shape[0] * num_outputs, output_shape[1]] sizes = [output_shape[0] for _ in range(num_outputs)] else: input_shape = [output_shape[0], output_shape[1] * num_outputs] sizes = [output_shape[1] for _ in range(num_outputs)] else: sizes = np.random.randint( low=max(1, output_shape[axis] - 2), high=output_shape[axis] + 2, size=num_outputs) total_size = np.sum(sizes) if axis == 0: input_shape = [total_size, output_shape[1]] else: input_shape = [output_shape[0], total_size] outputs = build_graph(device, input_shape, sizes, axis) config = tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L0))) with tf.Session(graph=graph, config=config) as session: logging.set_verbosity("info") tf.global_variables_initializer().run() bench = benchmark.TensorFlowBenchmark() bench.run_op_benchmark( session, outputs, mbs=input_shape[0] * input_shape[1] * 4 * 2 * 100 / 1e6, extras={ "input_shape": input_shape, "variable": variable, "axis": axis })
def train(): g = tf.Graph() with g.as_default(): # Load datasets. provider = DatasetMixer(('BaselNormals', 'MeinNormals')) images, normals, mask = provider.get('normals/mask') # Define model graph. with tf.variable_scope('net'): with slim.arg_scope([slim.batch_norm, slim.layers.dropout], is_training=True): scales = [1, 2, 4] prediction, pyramid = resnet_model.multiscale_nrm_net(images, scales=scales) # Add a cosine loss to every scale and the combined output. for net, level_name in zip([prediction] + pyramid, ['pred'] + scales): loss = losses.cosine_loss(net, normals, mask) tf.scalar_summary('losses/loss at {}'.format(level_name), loss) total_loss = slim.losses.get_total_loss() tf.scalar_summary('losses/total loss', total_loss) optimizer = tf.train.AdamOptimizer(FLAGS.initial_learning_rate) config = tf.ConfigProto(inter_op_parallelism_threads=2) with tf.Session(graph=g, config=config) as sess: init_fn = None if FLAGS.pretrained_resnet_checkpoint_path: init_fn = restore_resnet(sess, FLAGS.pretrained_resnet_checkpoint_path) if FLAGS.pretrained_model_checkpoint_path: print('Loading whole model...') variables_to_restore = slim.get_model_variables() init_fn = slim.assign_from_checkpoint_fn( FLAGS.pretrained_model_checkpoint_path, variables_to_restore, ignore_missing_vars=True) train_op = slim.learning.create_train_op(total_loss, optimizer, summarize_gradients=True) logging.set_verbosity(1) slim.learning.train(train_op, FLAGS.train_dir, save_summaries_secs=60, init_fn=init_fn, save_interval_secs=600)
def train(): g = tf.Graph() with g.as_default(): # Load datasets. provider = data_provider.Deblurring() images, deblurred = provider.get('deblurred') # Define model graph. with tf.variable_scope('net'): with slim.arg_scope([slim.batch_norm, slim.layers.dropout], is_training=True): scales = [1, 2, 4] prediction, pyramid = resnet_model.multiscale_deblurring_net( images, scales=scales) # Add a cosine loss to every scale and the combined output. for net, level_name in zip([prediction] + pyramid, ['pred'] + scales): loss = losses.smooth_l1(net, deblurred) slim.losses.add_loss(loss) tf.scalar_summary('losses/loss at {}'.format(level_name), loss) total_loss = slim.losses.get_total_loss() tf.scalar_summary('losses/total loss', total_loss) tf.image_summary('blurred', images) tf.image_summary('deblurred', deblurred) tf.image_summary('pred', prediction) optimizer = tf.train.AdamOptimizer(FLAGS.initial_learning_rate) with tf.Session(graph=g) as sess: if FLAGS.pretrained_resnet_checkpoint_path: restore_resnet(sess, FLAGS.pretrained_resnet_checkpoint_path) if FLAGS.pretrained_model_checkpoint_path: variables_to_restore = slim.get_variables_to_restore() saver = tf.train.Saver(variables_to_restore) saver.restore(sess, FLAGS.pretrained_model_checkpoint_path) train_op = slim.learning.create_train_op(total_loss, optimizer, summarize_gradients=True) logging.set_verbosity(1) slim.learning.train(train_op, FLAGS.train_dir, save_summaries_secs=60, save_interval_secs=600)
def main(unused_argv): logging.set_verbosity(logging.INFO) if not gfile.IsDirectory(OutputPath('')): gfile.MakeDirs(OutputPath('')) #bpe = BPE(codecs.open("code-file", encoding='utf-8'), "@@") wordMapPath = "word-map" tagMapPath = "tag-map" pMapPath = "prefix-list" sMapPath = "suffix-list" pMap = readAffix(pMapPath) sMap = readAffix(sMapPath) wordMap = readMap(wordMapPath) tagMap = readMap(tagMapPath) wordMap.insert(0, "-start-") wordMap.insert(0, "-end-") wordMap.insert(0, "-unknown-") pMap.insert(0, "-start-") pMap.insert(0, "-unknown-") sMap.insert(0, "-start-") sMap.insert(0, "-unknown-") feature_sizes = [ 8, 8, 2, 8, 8, 4 ] #num of features for each feature group: capitalization, words, other, prefix_2, suffix_2, previous_tags domain_sizes = [ 3, len(wordMap) + 3, 3, len(pMap) + 2, len(sMap) + 2, len(tagMap) + 1 ] num_actions = 45 embedding_dims = [8, 64, 8, 16, 16, 16] train_data_path = '/cs/natlang-user/vivian/wsj-conll/train.conllu' dev_data_path = '/cs/natlang-user/vivian/wsj-conll/dev.conllu' logging.info("loading data and precomputing features...") train_data = ConllData(train_data_path, wordMap, tagMap, pMap, sMap) dev_data = ConllData(dev_data_path, wordMap, tagMap, pMap, sMap) with tf.Session(FLAGS.tf_master) as sess: Train(sess, num_actions, feature_sizes, domain_sizes, embedding_dims, wordMap, tagMap, pMap, sMap, train_data, dev_data)
def main(unused_argv): logging.set_verbosity(logging.INFO) with tf.Session() as sess: src = gen_parser_ops.document_source(batch_size=32, corpus_name=FLAGS.corpus_name, task_context=FLAGS.task_context) sentence = sentence_pb2.Sentence() while True: documents, finished = sess.run(src) logging.info('Read %d documents', len(documents)) for d in documents: sentence.ParseFromString(d) d = to_dict(sentence) print json.dumps(d) if finished: break
def train(): record_names = ["sudoku_train"] * 50 + ["mnist_train"] record_paths = ["data/{}.tfrecords".format(x) for x in record_names] # Load data in batches. images, labels = data_provider.get_data(record_paths, batch_size=FLAGS.batch_size, is_training=True) # Define network with slim.arg_scope([slim.layers.dropout, slim.batch_norm], is_training=True): predictions = model(images) # Display images to tensorboard tf.image_summary('images', images, max_images=5) # Define loss function slim.losses.softmax_cross_entropy(predictions, labels) total_loss = slim.losses.get_total_loss() tf.scalar_summary('loss', total_loss) # Create learning rate decay global_step = slim.get_or_create_global_step() learning_rate = tf.train.exponential_decay(FLAGS.initial_learning_rate, global_step=global_step, decay_steps=FLAGS.decay_steps, decay_rate=FLAGS.decay_rate) # Optimizer to use. optimizer = tf.train.AdamOptimizer(learning_rate) # Create training operation train_op = slim.learning.create_train_op(total_loss, optimizer) logging.set_verbosity(1) # Start training slim.learning.train(train_op, FLAGS.log_dir, save_summaries_secs=20, save_interval_secs=60, log_every_n_steps=100)
def set_up_logging( log_path=None, level=logging.INFO, formatter='%(asctime)s - %(name)s - %(levelname)s - %(message)s'): tf_logging.set_verbosity(level) # create file handler which logs even debug messages if log_path: try: fh = logging.FileHandler(log_path) fh.setLevel(level) fh.setFormatter(logging.Formatter(formatter)) tf_logger = tf_logging.get_logger() tf_logger.addHandler(fh) tf_logger.info('Saving logs to "%s"' % log_path) tf_logger.propagate = False except FileNotFoundError: tf.logging.info('Cannot save logs to file in Cloud ML Engine')
def main(unused_argv): logging.set_verbosity(logging.INFO) if not gfile.IsDirectory(OutputPath('')): gfile.MakeDirs(OutputPath('')) # Rewrite context. RewriteContext() # Creates necessary term maps. if FLAGS.compute_lexicon: logging.info('Computing lexicon...') with tf.Session(FLAGS.tf_master) as sess: gen_parser_ops.lexicon_builder(task_context=OutputPath('context'), corpus_name=FLAGS.training_corpus).run() with tf.Session(FLAGS.tf_master) as sess: feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=OutputPath('context'), arg_prefix=FLAGS.arg_prefix)) # Well formed and projectivize. if FLAGS.projectivize_training_set: logging.info('Preprocessing...') with tf.Session(FLAGS.tf_master) as sess: source, last = gen_parser_ops.document_source( task_context=OutputPath('context'), batch_size=FLAGS.batch_size, corpus_name=FLAGS.training_corpus) sink = gen_parser_ops.document_sink( task_context=OutputPath('context'), corpus_name='projectivized-training-corpus', documents=gen_parser_ops.projectivize_filter( gen_parser_ops.well_formed_filter(source, task_context=OutputPath( 'context')), task_context=OutputPath('context'))) while True: tf_last, _ = sess.run([last, sink]) if tf_last: break logging.info('Training...') with tf.Session(FLAGS.tf_master) as sess: Train(sess, num_actions, feature_sizes, domain_sizes, embedding_dims)
def main(unused_argv): logging.set_verbosity(logging.INFO) if not gfile.IsDirectory(OutputPath('')): gfile.MakeDirs(OutputPath('')) # Rewrite context. RewriteContext() # Creates necessary term maps. if FLAGS.compute_lexicon: logging.info('Computing lexicon...') with tf.Session(FLAGS.tf_master) as sess: gen_parser_ops.lexicon_builder( task_context=OutputPath('context'), corpus_name=FLAGS.training_corpus).run() with tf.Session(FLAGS.tf_master) as sess: feature_sizes, domain_sizes, embedding_dims, num_actions = sess.run( gen_parser_ops.feature_size(task_context=OutputPath('context'), arg_prefix=FLAGS.arg_prefix)) # Well formed and projectivize. if FLAGS.projectivize_training_set: logging.info('Preprocessing...') with tf.Session(FLAGS.tf_master) as sess: source, last = gen_parser_ops.document_source( task_context=OutputPath('context'), batch_size=FLAGS.batch_size, corpus_name=FLAGS.training_corpus) sink = gen_parser_ops.document_sink( task_context=OutputPath('context'), corpus_name='projectivized-training-corpus', documents=gen_parser_ops.projectivize_filter( gen_parser_ops.well_formed_filter( source, task_context=OutputPath('context')), task_context=OutputPath('context'))) while True: tf_last, _ = sess.run([last, sink]) if tf_last: break logging.info('Training...') with tf.Session(FLAGS.tf_master) as sess: Train(sess, num_actions, feature_sizes, domain_sizes, embedding_dims)
def serve(self): """Starts a WSGI server that serves the TensorBoard app.""" tb_app = self.create_app() logging.info('Starting TensorBoard in directory %s', os.getcwd()) debug = FLAGS.insecure_debug_mode if debug: logging.set_verbosity(logging.DEBUG) logging.warning('TensorBoard is in debug mode. This is NOT SECURE.') print('Starting TensorBoard %s on port %d' % (self.get_tag(), FLAGS.port)) if FLAGS.host == '0.0.0.0': try: host = socket.gethostbyname(socket.gethostname()) print('(You can navigate to http://%s:%d)' % (host, FLAGS.port)) except socket.gaierror: pass else: print('(You can navigate to http://%s:%d)' % (FLAGS.host, FLAGS.port)) try: serving.run_simple( FLAGS.host, FLAGS.port, tb_app, threaded=True, use_reloader=debug, use_evalex=debug, use_debugger=debug) except socket.error: if FLAGS.port == 0: msg = 'Unable to find any open ports.' logging.error(msg) print(msg) return -2 else: msg = 'Tried to connect to port %d, but address is in use.' % FLAGS.port logging.error(msg) print(msg) return -3
def load_model(dragnn_spec, resource_path, checkpoint_filename, enable_tracing=False, tf_master='') : logging.set_verbosity(logging.WARN) # check check.IsTrue(dragnn_spec) check.IsTrue(resource_path) check.IsTrue(checkpoint_filename) # Load master spec master_spec = load_master_spec(dragnn_spec, resource_path) # Build graph graph, builder, annotator = build_inference_graph(master_spec, enable_tracing=enable_tracing) with graph.as_default() : # Restore model sess = tf.Session(target=tf_master, graph=graph) # Make sure to re-initialize all underlying state. sess.run(tf.global_variables_initializer()) builder.saver.restore(sess, checkpoint_filename) m = {} m['session'] = sess m['graph'] = graph m['builder'] = builder m['annotator'] = annotator return m
def main(unused_argv): logging.set_verbosity(logging.INFO) with tf.Session() as sess: src = gen_parser_ops.document_source(batch_size=32, corpus_name=FLAGS.corpus_name, task_context=FLAGS.task_context) sentence = sentence_pb2.Sentence() while True: documents, finished = sess.run(src) logging.info('Read %d documents', len(documents)) for d in documents: sentence.ParseFromString(d) tr = asciitree.LeftAligned() d = to_dict(sentence) print('Input: %s' % sentence.text) print('Parse:') tr_str = tr(d) pat = re.compile(r'\s*@\d+$') for tr_ln in tr_str.splitlines(): print(pat.sub('', tr_ln)) if finished: break
def main(unused_argv): logging.set_verbosity(logging.INFO) with tf.Session() as sess: src = gen_parser_ops.document_source(batch_size=32, corpus_name=FLAGS.corpus_name, task_context=FLAGS.task_context) sentence = sentence_pb2.Sentence() while True: documents, finished = sess.run(src) logging.info('Read %d documents', len(documents)) for d in documents: sentence.ParseFromString(d) # tr = asciitree.LeftAligned() d = to_dict(sentence) print 'Input: %s' % sentence.text print 'Parse:' print json.dumps(d, indent=True) #dom = parseString(dicttoxml.dicttoxml(d, attr_type=False)) #print dom.toprettyxml() #print dicttoxml.dicttoxml(d) if finished: break
def main(unused_argv): logging.set_verbosity(logging.DEBUG) with tf.Session() as sess: src = gen_parser_ops.document_source(batch_size=32, corpus_name=FLAGS.corpus_name, task_context=FLAGS.task_context) sentence = sentence_pb2.Sentence() while True: documents, finished = sess.run(src) logging.info('Read %d documents', len(documents)) for d in documents: sentence.ParseFromString(d) print formatter.format(sentence) # d_raw = d # # sentence.ParseFromString(d) # # tr = asciitree.LeftAligned() # # d = to_dict(sentence) # # print 'Input: %s' % sentence.text # # print 'Parse:' # # print tr(d) # print d_raw if finished: break
def main(unused_argv): logging.set_verbosity(logging.INFO) if not gfile.IsDirectory(FLAGS.resource_path): gfile.MakeDirs(FLAGS.resource_path) # Constructs lexical resources for SyntaxNet in the given resource path, from # the training data. if FLAGS.compute_lexicon: logging.info('Computing lexicon...') lexicon.build_lexicon(FLAGS.resource_path, FLAGS.training_corpus_path) # Construct the "lookahead" ComponentSpec. This is a simple right-to-left RNN # sequence model, which encodes the context to the right of each token. It has # no loss except for the downstream components. lookahead = spec_builder.ComponentSpecBuilder('lookahead') lookahead.set_network_unit( name='wrapped_units.LayerNormBasicLSTMNetwork', hidden_layer_sizes='256') lookahead.set_transition_system(name='shift-only', left_to_right='false') lookahead.add_fixed_feature(name='char', fml='input(-1).char input.char input(1).char', embedding_dim=32) lookahead.add_fixed_feature(name='char-bigram', fml='input.char-bigram', embedding_dim=32) lookahead.fill_from_resources(FLAGS.resource_path, FLAGS.tf_master) # Construct the ComponentSpec for segmentation. segmenter = spec_builder.ComponentSpecBuilder('segmenter') segmenter.set_network_unit( name='wrapped_units.LayerNormBasicLSTMNetwork', hidden_layer_sizes='128') segmenter.set_transition_system(name='binary-segment-transitions') segmenter.add_token_link( source=lookahead, fml='input.focus stack.focus', embedding_dim=64) segmenter.fill_from_resources(FLAGS.resource_path, FLAGS.tf_master) # Build and write master_spec. master_spec = spec_pb2.MasterSpec() master_spec.component.extend([lookahead.spec, segmenter.spec]) logging.info('Constructed master spec: %s', str(master_spec)) with gfile.GFile(FLAGS.resource_path + '/master_spec', 'w') as f: f.write(str(master_spec).encode('utf-8')) hyperparam_config = spec_pb2.GridPoint() try: text_format.Parse(FLAGS.hyperparams, hyperparam_config) except text_format.ParseError: text_format.Parse(base64.b64decode(FLAGS.hyperparams), hyperparam_config) # Build the TensorFlow graph. graph = tf.Graph() with graph.as_default(): builder = graph_builder.MasterBuilder(master_spec, hyperparam_config) component_targets = spec_builder.default_targets_from_spec(master_spec) trainers = [ builder.add_training_from_config(target) for target in component_targets ] assert len(trainers) == 1 annotator = builder.add_annotation() builder.add_saver() # Read in serialized protos from training data. training_set = ConllSentenceReader( FLAGS.training_corpus_path, projectivize=False).corpus() dev_set = ConllSentenceReader( FLAGS.dev_corpus_path, projectivize=False).corpus() # Convert word-based docs to char-based documents for segmentation training # and evaluation. with tf.Session(graph=tf.Graph()) as tmp_session: char_training_set_op = gen_parser_ops.segmenter_training_data_constructor( training_set) char_dev_set_op = gen_parser_ops.char_token_generator(dev_set) char_training_set = tmp_session.run(char_training_set_op) char_dev_set = tmp_session.run(char_dev_set_op) # Ready to train! logging.info('Training on %d sentences.', len(training_set)) logging.info('Tuning on %d sentences.', len(dev_set)) pretrain_steps = [0] train_steps = [FLAGS.num_epochs * len(training_set)] tf.logging.info('Creating TensorFlow checkpoint dir...') gfile.MakeDirs(os.path.dirname(FLAGS.checkpoint_filename)) summary_writer = trainer_lib.get_summary_writer(FLAGS.tensorboard_dir) with tf.Session(FLAGS.tf_master, graph=graph) as sess: # Make sure to re-initialize all underlying state. sess.run(tf.global_variables_initializer()) trainer_lib.run_training( sess, trainers, annotator, evaluation.segmentation_summaries, pretrain_steps, train_steps, char_training_set, char_dev_set, dev_set, FLAGS.batch_size, summary_writer, FLAGS.report_every, builder.saver, FLAGS.checkpoint_filename)
def main(unused_argv): logging.set_verbosity(logging.INFO) if not gfile.IsDirectory(FLAGS.resource_path): gfile.MakeDirs(FLAGS.resource_path) # Constructs lexical resources for SyntaxNet in the given resource path, from # the training data. if FLAGS.compute_lexicon: logging.info('Computing lexicon...') lexicon.build_lexicon(FLAGS.resource_path, FLAGS.training_corpus_path) # Construct the "lookahead" ComponentSpec. This is a simple right-to-left RNN # sequence model, which encodes the context to the right of each token. It has # no loss except for the downstream components. char2word = spec_builder.ComponentSpecBuilder('char_lstm') char2word.set_network_unit( name='wrapped_units.LayerNormBasicLSTMNetwork', hidden_layer_sizes='256') char2word.set_transition_system(name='char-shift-only', left_to_right='true') char2word.add_fixed_feature(name='chars', fml='char-input.text-char', embedding_dim=16) char2word.fill_from_resources(FLAGS.resource_path, FLAGS.tf_master) lookahead = spec_builder.ComponentSpecBuilder('lookahead') lookahead.set_network_unit( name='wrapped_units.LayerNormBasicLSTMNetwork', hidden_layer_sizes='256') lookahead.set_transition_system(name='shift-only', left_to_right='false') lookahead.add_link(source=char2word, fml='input.last-char-focus', embedding_dim=32) lookahead.fill_from_resources(FLAGS.resource_path, FLAGS.tf_master) # Construct the ComponentSpec for tagging. This is a simple left-to-right RNN # sequence tagger. tagger = spec_builder.ComponentSpecBuilder('tagger') tagger.set_network_unit( name='wrapped_units.LayerNormBasicLSTMNetwork', hidden_layer_sizes='256') tagger.set_transition_system(name='tagger') tagger.add_token_link(source=lookahead, fml='input.focus', embedding_dim=32) tagger.fill_from_resources(FLAGS.resource_path, FLAGS.tf_master) # Construct the ComponentSpec for parsing. parser = spec_builder.ComponentSpecBuilder('parser') parser.set_network_unit(name='FeedForwardNetwork', hidden_layer_sizes='256', layer_norm_hidden='True') parser.set_transition_system(name='arc-standard') parser.add_token_link(source=lookahead, fml='input.focus', embedding_dim=32) parser.add_token_link( source=tagger, fml='input.focus stack.focus stack(1).focus', embedding_dim=32) # Recurrent connection for the arc-standard parser. For both tokens on the # stack, we connect to the last time step to either SHIFT or REDUCE that # token. This allows the parser to build up compositional representations of # phrases. parser.add_link( source=parser, # recurrent connection name='rnn-stack', # unique identifier fml='stack.focus stack(1).focus', # look for both stack tokens source_translator='shift-reduce-step', # maps token indices -> step embedding_dim=32) # project down to 32 dims parser.fill_from_resources(FLAGS.resource_path, FLAGS.tf_master) master_spec = spec_pb2.MasterSpec() master_spec.component.extend([char2word.spec, lookahead.spec, tagger.spec, parser.spec]) logging.info('Constructed master spec: %s', str(master_spec)) hyperparam_config = spec_pb2.GridPoint() hyperparam_config.decay_steps = 128000 hyperparam_config.learning_rate = 0.001 hyperparam_config.learning_method = 'adam' hyperparam_config.adam_beta1 = 0.9 hyperparam_config.adam_beta2 = 0.9 hyperparam_config.adam_eps = 0.0001 hyperparam_config.gradient_clip_norm = 1 hyperparam_config.self_norm_alpha = 1.0 hyperparam_config.use_moving_average = True hyperparam_config.dropout_rate = 0.7 hyperparam_config.seed = 1 # Build the TensorFlow graph. graph = tf.Graph() with graph.as_default(): builder = graph_builder.MasterBuilder(master_spec, hyperparam_config) component_targets = spec_builder.default_targets_from_spec(master_spec) trainers = [ builder.add_training_from_config(target) for target in component_targets ] assert len(trainers) == 2 annotator = builder.add_annotation() builder.add_saver() # Read in serialized protos from training data. training_set = sentence_io.ConllSentenceReader( FLAGS.training_corpus_path, projectivize=FLAGS.projectivize_training_set).corpus() dev_set = sentence_io.ConllSentenceReader( FLAGS.dev_corpus_path, projectivize=False).corpus() # Ready to train! logging.info('Training on %d sentences.', len(training_set)) logging.info('Tuning on %d sentences.', len(dev_set)) pretrain_steps = [100, 0] tagger_steps = 1000 train_steps = [tagger_steps, 8 * tagger_steps] tf.logging.info('Creating TensorFlow checkpoint dir...') gfile.MakeDirs(os.path.dirname(FLAGS.checkpoint_filename)) summary_writer = trainer_lib.get_summary_writer(FLAGS.tensorboard_dir) with tf.Session(FLAGS.tf_master, graph=graph) as sess: # Make sure to re-initialize all underlying state. sess.run(tf.global_variables_initializer()) trainer_lib.run_training( sess, trainers, annotator, evaluation.parser_summaries, pretrain_steps, train_steps, training_set, dev_set, dev_set, FLAGS.batch_size, summary_writer, FLAGS.report_every, builder.saver, FLAGS.checkpoint_filename)
def main(unused_argv): logging.set_verbosity(logging.INFO) with tf.Session() as sess: Eval(sess)
def main(unused_argv=None): debug = FLAGS.insecure_debug_mode logdir = os.path.expanduser(FLAGS.logdir) if debug: logging.set_verbosity(logging.DEBUG) logging.warning('TensorBoard is in debug mode. This is NOT SECURE.') if FLAGS.inspect: logging.info('Not bringing up TensorBoard, but inspecting event files.') event_file = os.path.expanduser(FLAGS.event_file) efi.inspect(logdir, event_file, FLAGS.tag) return 0 if not logdir: msg = ('A logdir must be specified. Run `tensorboard --help` for ' 'details and examples.') logging.error(msg) print(msg) return -1 logging.info('Starting TensorBoard in directory %s', os.getcwd()) plugins = {'projector': projector_plugin.ProjectorPlugin()} tb_app = application.TensorBoardWSGIApp( logdir, plugins, purge_orphaned_data=FLAGS.purge_orphaned_data, reload_interval=FLAGS.reload_interval) try: tag = resource_loader.load_resource('tensorboard/TAG').strip() logging.info('TensorBoard is tag: %s', tag) except IOError: logging.info('Unable to read TensorBoard tag') tag = '' status_bar.SetupStatusBarInsideGoogle('TensorBoard %s' % tag, FLAGS.port) print('Starting TensorBoard %s on port %d' % (tag, FLAGS.port)) if FLAGS.host == "0.0.0.0": try: host = socket.gethostbyname(socket.gethostname()) print('(You can navigate to http://%s:%d)' % (host, FLAGS.port)) except socket.gaierror: pass else: print('(You can navigate to http://%s:%d)' % (FLAGS.host, FLAGS.port)) try: serving.run_simple( FLAGS.host, FLAGS.port, tb_app, threaded=True, use_reloader=debug, use_evalex=debug, use_debugger=debug) except socket.error: if FLAGS.port == 0: msg = 'Unable to find any open ports.' logging.error(msg) print(msg) return -2 else: msg = 'Tried to connect to port %d, but address is in use.' % FLAGS.port logging.error(msg) print(msg) return -3
def main(unused_argv): logging.set_verbosity(logging.INFO) check.IsTrue(FLAGS.checkpoint_filename) check.IsTrue(FLAGS.tensorboard_dir) check.IsTrue(FLAGS.resource_path) if not gfile.IsDirectory(FLAGS.resource_path): gfile.MakeDirs(FLAGS.resource_path) training_corpus_path = gfile.Glob(FLAGS.training_corpus_path)[0] tune_corpus_path = gfile.Glob(FLAGS.tune_corpus_path)[0] # SummaryWriter for TensorBoard tf.logging.info('TensorBoard directory: "%s"', FLAGS.tensorboard_dir) tf.logging.info('Deleting prior data if exists...') stats_file = '%s.stats' % FLAGS.checkpoint_filename try: stats = gfile.GFile(stats_file, 'r').readlines()[0].split(',') stats = [int(x) for x in stats] except errors.OpError: stats = [-1, 0, 0] tf.logging.info('Read ckpt stats: %s', str(stats)) do_restore = True if stats[0] < FLAGS.job_id: do_restore = False tf.logging.info('Deleting last job: %d', stats[0]) try: gfile.DeleteRecursively(FLAGS.tensorboard_dir) gfile.Remove(FLAGS.checkpoint_filename) except errors.OpError as err: tf.logging.error('Unable to delete prior files: %s', err) stats = [FLAGS.job_id, 0, 0] tf.logging.info('Creating the directory again...') gfile.MakeDirs(FLAGS.tensorboard_dir) tf.logging.info('Created! Instatiating SummaryWriter...') summary_writer = trainer_lib.get_summary_writer(FLAGS.tensorboard_dir) tf.logging.info('Creating TensorFlow checkpoint dir...') gfile.MakeDirs(os.path.dirname(FLAGS.checkpoint_filename)) # Constructs lexical resources for SyntaxNet in the given resource path, from # the training data. if FLAGS.compute_lexicon: logging.info('Computing lexicon...') lexicon.build_lexicon( FLAGS.resource_path, training_corpus_path, morph_to_pos=True) tf.logging.info('Loading MasterSpec...') master_spec = spec_pb2.MasterSpec() with gfile.FastGFile(FLAGS.dragnn_spec, 'r') as fin: text_format.Parse(fin.read(), master_spec) spec_builder.complete_master_spec(master_spec, None, FLAGS.resource_path) logging.info('Constructed master spec: %s', str(master_spec)) hyperparam_config = spec_pb2.GridPoint() # Build the TensorFlow graph. tf.logging.info('Building Graph...') hyperparam_config = spec_pb2.GridPoint() try: text_format.Parse(FLAGS.hyperparams, hyperparam_config) except text_format.ParseError: text_format.Parse(base64.b64decode(FLAGS.hyperparams), hyperparam_config) g = tf.Graph() with g.as_default(): builder = graph_builder.MasterBuilder(master_spec, hyperparam_config) component_targets = [ spec_pb2.TrainTarget( name=component.name, max_index=idx + 1, unroll_using_oracle=[False] * idx + [True]) for idx, component in enumerate(master_spec.component) if 'shift-only' not in component.transition_system.registered_name ] trainers = [ builder.add_training_from_config(target) for target in component_targets ] annotator = builder.add_annotation() builder.add_saver() # Read in serialized protos from training data. training_set = ConllSentenceReader( training_corpus_path, projectivize=FLAGS.projectivize_training_set, morph_to_pos=True).corpus() tune_set = ConllSentenceReader( tune_corpus_path, projectivize=False, morph_to_pos=True).corpus() # Ready to train! logging.info('Training on %d sentences.', len(training_set)) logging.info('Tuning on %d sentences.', len(tune_set)) pretrain_steps = [10000, 0] tagger_steps = 100000 train_steps = [tagger_steps, 8 * tagger_steps] with tf.Session(FLAGS.tf_master, graph=g) as sess: # Make sure to re-initialize all underlying state. sess.run(tf.global_variables_initializer()) if do_restore: tf.logging.info('Restoring from checkpoint...') builder.saver.restore(sess, FLAGS.checkpoint_filename) prev_tagger_steps = stats[1] prev_parser_steps = stats[2] tf.logging.info('adjusting schedule from steps: %d, %d', prev_tagger_steps, prev_parser_steps) pretrain_steps[0] = max(pretrain_steps[0] - prev_tagger_steps, 0) tf.logging.info('new pretrain steps: %d', pretrain_steps[0]) trainer_lib.run_training( sess, trainers, annotator, evaluation.parser_summaries, pretrain_steps, train_steps, training_set, tune_set, tune_set, FLAGS.batch_size, summary_writer, FLAGS.report_every, builder.saver, FLAGS.checkpoint_filename, stats)
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import tensorflow as tf from tensorflow.python.platform import tf_logging as logging from tensorflow.examples.tutorials.mnist import input_data as mnist_data import argparse import math import sys logging.set_verbosity(logging.INFO) logging.log(logging.INFO, "Tensorflow version " + tf.__version__) # # To run this: see README.md # # Called when the model is deployed for online predictions on Cloud ML Engine. def serving_input_fn(): inputs = {'image': tf.placeholder(tf.float32, [None, 28, 28])} # Here, you can transform the data received from the API call features = inputs return tf.estimator.export.ServingInputReceiver(features, inputs) # In memory training data for this simple case.