def train(args): """Trains the model.""" if args.verbose: tf.logging.set_verbosity(tf.logging.INFO) # Create input data pipeline. with tf.device("/cpu:0"): train_files = glob.glob(args.train_glob)[:3] if not train_files: raise RuntimeError("No training images found with glob '{}'.".format( args.train_glob)) train_dataset = tf.data.TextLineDataset( train_files, compression_type=None, buffer_size=len(train_files), num_parallel_reads=args.preprocess_threads) train_dataset = train_dataset.map( string_to_tensor, num_parallel_calls=args.preprocess_threads) train_dataset = train_dataset.shuffle(buffer_size=len(train_files)).repeat() train_dataset = train_dataset.batch(args.batchsize) train_dataset = train_dataset.prefetch(32) num_pixels = args.batchsize * 128 # Get training patch from dataset. x = train_dataset.make_one_shot_iterator().get_next() # Instantiate model. analysis_transform = AnalysisTransform(32) entropy_bottleneck = tfc.EntropyBottleneck() synthesis_transform = SynthesisTransform(32) # Build autoencoder. y = analysis_transform(x) y_tilde, likelihoods = entropy_bottleneck(y, training=True) x_tilde = synthesis_transform(y_tilde) timestamps, polarities = tf.split(x_tilde, num_or_size_splits=2, axis=-1) timestamps = tf.math.abs(timestamps) polarities = tf.math.tanh(polarities) x_tilde = tf.concat([timestamps, polarities], axis=-1) train_bpp = tf.reduce_mean( -tf.reduce_sum(likelihoods * tf.log(likelihoods), axis=[1, 2]) / np.log(2)) # Mean squared error across pixels. train_mse = tf.reduce_mean((x - x_tilde)**2.) # The rate-distortion cost. train_loss = args.lmbda * train_mse + train_bpp # Minimize loss and auxiliary loss, and execute update op. step = tf.train.create_global_step() main_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4) main_step = main_optimizer.minimize(train_loss, global_step=step) aux_optimizer = tf.train.AdamOptimizer(learning_rate=1e-3) aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0]) train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0]) tf.summary.scalar("loss", train_loss) tf.summary.scalar("bpp", train_bpp) tf.summary.scalar("mse", train_mse) hooks = [ tf.train.StopAtStepHook(last_step=args.last_step), tf.train.NanTensorHook(train_loss), ] with tf.train.MonitoredTrainingSession(hooks=hooks, checkpoint_dir=args.checkpoint_dir, save_checkpoint_secs=300, save_summaries_secs=60) as sess: while not sess.should_stop(): sess.run(train_op)
def arbitrary_style_image_inputs(style_dataset_file, batch_size=None, image_size=None, center_crop=True, shuffle=True, augment_style_images=False, random_style_image_size=False, min_rand_image_size=128, max_rand_image_size=300): """Loads a batch of random style image given the path of tfrecord dataset. This method does not return pre-compute Gram matrices for the images like style_image_inputs. But it can provide data augmentation. If augment_style_images is equal to True, then style images will randomly modified (eg. changes in brightness, hue or saturation) for data augmentation. If random_style_image_size is set to True then all images in one batch will be resized to a random size. Args: style_dataset_file: str, path to the tfrecord dataset of style files. batch_size: int. If provided, batches style images. Defaults to None. image_size: int. The images will be resized bilinearly so that the smallest side has size image_size. Defaults to None. center_crop: bool. If True, center-crops to [image_size, image_size]. Defaults to False. shuffle: bool, whether to shuffle style files at random. Defaults to False. augment_style_images: bool. Wheather to augment style images or not. random_style_image_size: bool. If this value is True, then all the style images in one batch will be resized to a random size between min_rand_image_size and max_rand_image_size. min_rand_image_size: int. If random_style_image_size is True, this value specifies the minimum image size. max_rand_image_size: int. If random_style_image_size is True, this value specifies the maximum image size. Returns: 4-D tensor of shape [1, ?, ?, 3] with values in [0, 1] for the style image (with random changes for data augmentation if augment_style_image_size is set to true), and 0-D tensor for the style label, 4-D tensor of shape [1, ?, ?, 3] with values in [0, 1] for the style image without random changes for data augmentation. Raises: ValueError: if center cropping is requested but no image size is provided, or if batch size is specified but center-cropping or augment-style-images is not requested, or if both augment-style-images and center-cropping are requested. """ if center_crop and image_size is None: raise ValueError('center-cropping requires specifying the image size.') if center_crop and augment_style_images: raise ValueError( 'When augment_style_images is true images will be randomly cropped.' ) if batch_size is not None and not center_crop and not augment_style_images: raise ValueError( 'batching requires same image sizes (Set center-cropping or ' 'augment_style_images to true)') with tf.name_scope('style_image_processing'): # Force all input processing onto CPU in order to reserve the GPU for the # forward inference and back-propagation. with tf.device('/cpu:0'): filename_queue = tf.train.string_input_producer( [style_dataset_file], shuffle=False, capacity=1, name='filename_queue') if shuffle: examples_queue = tf.RandomShuffleQueue( capacity=64, min_after_dequeue=32, dtypes=[tf.string], name='random_examples_queue') else: examples_queue = tf.FIFOQueue(capacity=64, dtypes=[tf.string], name='fifo_examples_queue') reader = tf.TFRecordReader() _, value = reader.read(filename_queue) enqueue_ops = [examples_queue.enqueue([value])] tf.train.queue_runner.add_queue_runner( tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops)) example_serialized = examples_queue.dequeue() features = tf.parse_single_example( example_serialized, features={ 'label': tf.FixedLenFeature([], tf.int64), 'image_raw': tf.FixedLenFeature([], tf.string) }) image = tf.image.decode_jpeg(features['image_raw']) image.set_shape([None, None, 3]) label = features['label'] if image_size is not None: image_channels = int(image.shape[2]) if augment_style_images: image_orig = image image = tf.image.random_brightness(image, max_delta=0.8) image = tf.image.random_saturation(image, lower=0.5, upper=1.5) image = tf.image.random_hue(image, max_delta=0.2) image = tf.image.random_flip_left_right(image) image = tf.image.random_flip_up_down(image) random_larger_image_size = tf.random_uniform( [], minval=image_size + 2, maxval=image_size + 200, dtype=tf.int32) image = _aspect_preserving_resize( image, random_larger_image_size) image = tf.random_crop( image, size=[image_size, image_size, image_channels]) image.set_shape([image_size, image_size, image_channels]) image_orig = _aspect_preserving_resize( image_orig, image_size + 2) image_orig = _central_crop([image_orig], image_size, image_size)[0] image_orig.set_shape([image_size, image_size, 3]) elif center_crop: image = _aspect_preserving_resize(image, image_size + 2) image = _central_crop([image], image_size, image_size)[0] image.set_shape([image_size, image_size, image_channels]) image_orig = image else: image = _aspect_preserving_resize(image, image_size) image_orig = image image = tf.to_float(image) / 255.0 image_orig = tf.to_float(image_orig) / 255.0 if batch_size is None: image = tf.expand_dims(image, 0) else: [image, image_orig, label] = tf.train.batch([image, image_orig, label], batch_size=batch_size) if random_style_image_size: # Selects a random size for the style images and resizes all the images # in the batch to that size. image = _aspect_preserving_resize( image, tf.random_uniform([], minval=min_rand_image_size, maxval=max_rand_image_size, dtype=tf.int32)) return image, label, image_orig
def _load_model(self): """ Define and instantiate the computation graph. """ import tensorflow.compat.v1 as tf1 from lingvo import model_registry, model_imports from lingvo.core import cluster_factory from asr.librispeech import Librispeech960Wpm # check and download patched Lingvo ASR decoder _ = self._check_and_download_file( self._LINGVO_CFG["decoder"]["uri"], self._LINGVO_CFG["decoder"]["basename"], self._LINGVO_CFG["path"], "asr" ) # monkey-patch the lingvo.asr.decoder.AsrDecoderBase._ComputeMetrics method with patched method according # to Qin et al import lingvo.tasks.asr.decoder as decoder import asr.decoder_patched as decoder_patched decoder.AsrDecoderBase._ComputeMetrics = decoder_patched.AsrDecoderBase._ComputeMetrics # check and download Lingvo ASR vocab # vocab_path = self._check_and_download_vocab() vocab_path = self._check_and_download_file( self._LINGVO_CFG["vocab"]["uri"], self._LINGVO_CFG["vocab"]["basename"], self._LINGVO_CFG["path"], "asr" ) # monkey-patch tasks.asr.librispeechLibriSpeech960Wpm class attribute WPM_SYMBOL_TABLE_FILEPATH Librispeech960Wpm.WPM_SYMBOL_TABLE_FILEPATH = vocab_path # register model params model_name = "asr.librispeech.Librispeech960Wpm" model_imports.ImportParams(model_name) params = model_registry._ModelRegistryHelper.GetParams(model_name, "Test") # set random seed parameter if self.random_seed is not None: params.random_seed = self.random_seed # instantiate Lingvo ASR model cluster = cluster_factory.Cluster(params.cluster) with cluster, tf1.device(cluster.GetPlacer()): model = params.Instantiate() task = model.GetTask() # load Qin et al. pretrained model _ = self._check_and_download_file( self._LINGVO_CFG["model_data"]["uri"], self._LINGVO_CFG["model_data"]["basename"], self._LINGVO_CFG["path"], "asr", "model", ) model_index_path = self._check_and_download_file( self._LINGVO_CFG["model_index"]["uri"], self._LINGVO_CFG["model_index"]["basename"], self._LINGVO_CFG["path"], "asr", "model", ) self._sess.run(tf1.global_variables_initializer()) saver = tf1.train.Saver([var for var in tf1.global_variables() if var.name.startswith("librispeech")]) saver.restore(self._sess, os.path.splitext(model_index_path)[0]) # set 'enable_asserts'-flag to False (Note: this flag ensures correct GPU support) tf1.flags.FLAGS.enable_asserts = False return model, task, cluster
def build_example(label, param_dict_real, zip_path_label): """Build the model with parameter values set in param_dict_real. Args: label: Label of the model param_dict_real: Parameter dictionary (arguments to the factories make_graph and make_test_inputs) zip_path_label: Filename in the zip Returns: (tflite_model_binary, report) where tflite_model_binary is the serialized flatbuffer as a string and report is a dictionary with keys `toco_log` (log of toco conversion), `tf_log` (log of tf conversion), `toco` (a string of success status of the conversion), `tf` (a string success status of the conversion). """ np.random.seed(RANDOM_SEED) report = {"toco": report_lib.NOTRUN, "tf": report_lib.FAILED} # Build graph report["tf_log"] = "" report["toco_log"] = "" tf.reset_default_graph() with tf.Graph().as_default(): with tf.device("/cpu:0"): try: inputs, outputs = make_graph(param_dict_real) except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError, ValueError): report["tf_log"] += traceback.format_exc() return None, report sess = tf.Session() try: baseline_inputs, baseline_outputs = (make_test_inputs( param_dict_real, sess, inputs, outputs)) except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError, ValueError): report["tf_log"] += traceback.format_exc() return None, report report["toco"] = report_lib.FAILED report["tf"] = report_lib.SUCCESS # Convert graph to toco input_tensors = [(input_tensor.name.split(":")[0], input_tensor.shape, input_tensor.dtype) for input_tensor in inputs] output_tensors = [ _normalize_output_name(out.name) for out in outputs ] # pylint: disable=g-long-ternary graph_def = freeze_graph( sess, tf.global_variables() + inputs + outputs) if use_frozen_graph else sess.graph_def if "split_tflite_lstm_inputs" in param_dict_real: extra_toco_options.split_tflite_lstm_inputs = param_dict_real[ "split_tflite_lstm_inputs"] tflite_model_binary, toco_log = options.tflite_convert_function( options, graph_def, input_tensors, output_tensors, extra_toco_options=extra_toco_options, test_params=param_dict_real) report["toco"] = (report_lib.SUCCESS if tflite_model_binary is not None else report_lib.FAILED) report["toco_log"] = toco_log if options.save_graphdefs: archive.writestr(zip_path_label + ".pbtxt", text_format.MessageToString(graph_def), zipfile.ZIP_DEFLATED) if tflite_model_binary: if options.make_edgetpu_tests: # Set proper min max values according to input dtype. baseline_inputs, baseline_outputs = generate_inputs_outputs( tflite_model_binary, min_value=0, max_value=255) archive.writestr(zip_path_label + ".bin", tflite_model_binary, zipfile.ZIP_DEFLATED) example = { "inputs": baseline_inputs, "outputs": baseline_outputs } example_fp = StringIO() write_examples(example_fp, [example]) archive.writestr(zip_path_label + ".inputs", example_fp.getvalue(), zipfile.ZIP_DEFLATED) example_fp2 = StringIO() write_test_cases(example_fp2, zip_path_label + ".bin", [example]) archive.writestr(zip_path_label + "_tests.txt", example_fp2.getvalue(), zipfile.ZIP_DEFLATED) zip_manifest_label = zip_path_label + " " + label if zip_path_label == label: zip_manifest_label = zip_path_label zip_manifest.append(zip_manifest_label + "\n") return tflite_model_binary, report
def train_note_values_conv_net(test_data_arr, test_data_label, train_data_arr, train_data_label): """ This function trains the convolutional network for recognizing note values based on input data. Tutorial for this code found here: https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/keras/classification.ipynb The results are saved on a disk so that they can be used without retraining the network. :param train_data_label: Labels with names and durations for the train data images. :param train_data_arr: Array containing the train images. :param test_data_label: Labels with names and durations for the test data images. :param test_data_arr: Array containing the test images. """ gpus = tf.config.experimental.list_physical_devices('GPU') tf.config.experimental.set_memory_growth(gpus[0], True) os.environ[ 'TF_CPP_MIN_LOG_LEVEL'] = '2' # Alleged fix for some tensorflow bugs. construct_output( indent_level=0, message="Convolutional Network 1 (Note value determining).") # Scale these values to a range of 0 to 1 before feeding them to the convolutional network model print("Scaling test values to [0-1] range.") test_data_arr = test_data_arr / 255.0 print("Scaling train values to [0-1] range (this will take a while).") train_data_arr = train_data_arr / 255.0 # Construct the path for saving the results of training. saved_model_values_path = os.path.abspath( os.path.join(str(Path(__file__).parent.parent.parent), 'resources')) saved_model_values_path = os.path.join(saved_model_values_path, 'saved_models') saved_model_name = "value_processing_net_saved.ckpt" saved_model_values_path = os.path.join(saved_model_values_path, saved_model_name) values_model_cb = tf.keras.callbacks.ModelCheckpoint( filepath=saved_model_values_path, save_weights_only=True, verbose=1) # First network only recognizes the values. No need to feed it unrecognized elements (elements with no value). value_network_train_data_arr = np.array([ x for i, x in enumerate(train_data_arr) if train_data_label[i][0][0] != "Uncategorized" ]) value_network_train_data_label = np.array([(x[0][0], x[1]) for x in train_data_label if x[0][0] != "Uncategorized"]) value_network_test_data_arr = np.array([ x for i, x in enumerate(test_data_arr) if test_data_label[i][0][0] != "Uncategorized" ]) value_network_test_data_label = np.array([(x[0][0], x[1]) for x in test_data_label if x[0][0] != "Uncategorized"]) class_names = [ "A3", "A4", "A5", # class_names contains possible results "B3", "B4", "B5", "C3", "C4", "C5", "D3", "D4", "D5", "E3", "E4", "E5", "F3", "F4", "F5", "G3", "G4", "G5" ] # Fetch only the labels (note values) from the data. value_network_train_data_label = [ item[0] for item in value_network_train_data_label ] # Assign the corresponding numerical values to labels. value_network_train_data_label_values_numerical = values_to_numerical( value_network_train_data_label, class_names) with tf.device( '/GPU:1' ): # Specify using nvidia discrete GPU instead of Intel integrated graphics. construct_output(indent_level=0, message="Start training.") # Set up the layers. # The first layer in this network, tf.keras.layers.Flatten, transforms the format of the images # from a 2D array(200x200px) to 1D array(of 200x200 = 40000 pixels) # After the pixels are flattened, the network consists of a sequence of two tf.keras.layers.Dense layers. # These are densely connected, or fully connected, neural layers. # The first Dense layer has 128 nodes( or neurons). # The second( and last) layer returns an array with length of 22. # Each node contains a score that indicates the current image belongs to one of the 22 classes. model = tf.keras.Sequential([ tf.keras.layers.Flatten(input_shape=(200, 200)), tf.keras.layers.Dense(128, activation='relu'), tf.keras.layers.Dense(22) ]) # Before the model is ready for training, it needs a few more settings. # These are added during the model's compile step: # Loss function —This measures how accurate the model is during training. # You want to minimize this function to "steer" the model in the right direction. # Optimizer —This is how the model is updated based on the data it sees and its loss function. # Metrics —Used to monitor the training and testing steps. # The following example uses accuracy, the fraction of the images that are correctly classified. model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True), metrics=['accuracy']) # Training the convolutional network model requires the following steps: # Feed the training data to the model. # In this example, the training data is in the train_images and train_labels arrays. # The model learns to associate images and labels. # You ask the model to make predictions about a test set—in this example, the test_images array. # Verify that the predictions match the labels from the test_labels array. model.fit(value_network_train_data_arr, value_network_train_data_label_values_numerical, epochs=3, callbacks=[values_model_cb]) construct_output( indent_level=0, message="Save the network weights to avoid retraining on every run." ) # Attach a softmax layer to convert the logits to probabilities, which are easier to interpret. probability_model = tf.keras.Sequential( [model, tf.keras.layers.Softmax()]) # TESTING THE NETWORK. ======================================================================================= # Compare how the model performs on the test dataset. # value_network_test_data_label = [item[0] for item in value_network_test_data_label] # value_network_test_data_label_values_numerical = values_to_numerical( # value_network_test_data_label, # class_names) # test_loss, test_acc = model.evaluate(value_network_test_data_arr, # value_network_test_data_label_values_numerical, # verbose=2 # ) # print('\nTest accuracy:', test_acc) # predictions = probability_model.predict(value_network_test_data_arr) # print(predictions[0]) # print("max= ", np.argmax(predictions[0])) # import cv2 # cv2.imshow("img", value_network_test_data_arr[0]) # cv2.waitKey() construct_output(indent_level=0, message="End training.") construct_output( indent_level=0, message="Convolutional Network 1 (Note value determining) Done.")
def _custom_parsing_context(self): dev_spec = tf.DeviceSpec( device_type=("GPU" if self.device_option.is_gpu() else "CPU"), device_index=self.device_option.num) return tf.device(dev_spec)
def main(unused_argv=None): tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): # Forces all input processing onto CPU in order to reserve the GPU for the # forward inference and back-propagation. device = '/cpu:0' if not FLAGS.ps_tasks else '/job:worker/cpu:0' with tf.device( tf.train.replica_device_setter(FLAGS.ps_tasks, worker_device=device)): # Loads content images. content_inputs_, _ = image_utils.imagenet_inputs( FLAGS.batch_size, FLAGS.image_size) # Loads style images. [style_inputs_, _, _] = image_utils.arbitrary_style_image_inputs( FLAGS.style_dataset_file, batch_size=FLAGS.batch_size, image_size=FLAGS.image_size, shuffle=True, center_crop=FLAGS.center_crop, augment_style_images=FLAGS.augment_style_images, random_style_image_size=FLAGS.random_style_image_size) with tf.device(tf.train.replica_device_setter(FLAGS.ps_tasks)): # Process style and content weight flags. content_weights = ast.literal_eval(FLAGS.content_weights) style_weights = ast.literal_eval(FLAGS.style_weights) # Define the model stylized_images, total_loss, loss_dict, \ _ = build_mobilenet_model.build_mobilenet_model( content_inputs_, style_inputs_, mobilenet_trainable=False, style_params_trainable=True, transformer_trainable=True, mobilenet_end_point='layer_19', transformer_alpha=FLAGS.alpha, style_prediction_bottleneck=100, adds_losses=True, content_weights=content_weights, style_weights=style_weights, total_variation_weight=FLAGS.total_variation_weight, ) # Adding scalar summaries to the tensorboard. for key, value in loss_dict.items(): tf.summary.scalar(key, value) # Adding Image summaries to the tensorboard. tf.summary.image('image/0_content_inputs', content_inputs_, 3) tf.summary.image('image/1_style_inputs_aug', style_inputs_, 3) tf.summary.image('image/2_stylized_images', stylized_images, 3) # Set up training optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate) train_op = slim.learning.create_train_op( total_loss, optimizer, clip_gradient_norm=FLAGS.clip_gradient_norm, summarize_gradients=False) # Function to restore VGG16 parameters. init_fn_vgg = slim.assign_from_checkpoint_fn( vgg.checkpoint_file(), slim.get_variables('vgg_16')) # Function to restore Mobilenet V2 parameters. mobilenet_variables_dict = { var.op.name: var for var in slim.get_model_variables('MobilenetV2') } init_fn_mobilenet = slim.assign_from_checkpoint_fn( FLAGS.mobilenet_checkpoint, mobilenet_variables_dict) # Function to restore VGG16 and Mobilenet V2 parameters. def init_sub_networks(session): init_fn_vgg(session) init_fn_mobilenet(session) # Run training slim.learning.train(train_op=train_op, logdir=os.path.expanduser(FLAGS.train_dir), master=FLAGS.master, is_chief=FLAGS.task == 0, number_of_steps=FLAGS.train_steps, init_fn=init_sub_networks, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs)
def multi_hop_fact(qry_input_ids, qry_input_mask, qry_entity_ids, entity_ids, entity_mask, ent2fact_ind, ent2fact_val, fact2ent_ind, fact2ent_val, fact2fact_ind, fact2fact_val, is_training, use_one_hot_embeddings, bert_config, qa_config, fact_mips_config, num_hops, exclude_set=None, is_printing=True): """Multi-hops of propagation from input to output facts. Args: qry_input_ids: qry_input_mask: qry_entity_ids: entity_ids: (entity_word_ids) [num_entities, max_entity_len] Tensor holding word ids of each entity. entity_mask: (entity_word_masks) [num_entities, max_entity_len] Tensor with masks into word ids above. ent2fact_ind: ent2fact_val: fact2ent_ind: fact2ent_val: fact2fact_ind: fact2fact_val: is_training: use_one_hot_embeddings: bert_config: qa_config: fact_mips_config: num_hops: exclude_set: is_printing: Returns: layer_entities: layer_facts: layer_dense: layer_sp: batch_entities_nosc: qry_seq_emb: """ del entity_ids, entity_mask, exclude_set # Not used for now. # MIPS search for facts. Build fact feature Database with tf.device("/cpu:0"): tf_fact_db, fact_mips_search_fn = search_utils.create_mips_searcher( fact_mips_config.ckpt_var_name, # [fact_mips_config.num_facts, fact_mips_config.emb_size], fact_mips_config.ckpt_path, fact_mips_config.num_neighbors, local_var_name="scam_init_barrier_fact") # for question BOW embedding with tf.variable_scope("qry/bow"): # trainable word weights over the BERT vocab for all query embeddings. word_weights = tf.get_variable( "word_weights", [bert_config.vocab_size, 1], dtype=tf.float32, initializer=tf.ones_initializer()) qry_seq_emb, word_emb_table = model_utils.shared_qry_encoder_v2( qry_input_ids, qry_input_mask, is_training, use_one_hot_embeddings, bert_config, qa_config) del word_weights, word_emb_table # Not used for now. batch_size = tf.shape(qry_input_ids)[0] # Get question entities w/o scores. batch_qry_entities = tf.SparseTensor( indices=tf.concat([ qry_entity_ids.indices[:, 0:1], tf.cast(tf.expand_dims(qry_entity_ids.values, 1), tf.int64) ], axis=1), values=tf.ones_like(qry_entity_ids.values, dtype=tf.float32), dense_shape=[batch_size, qa_config.num_entities]) # Prepare initial facts. initial_facts = model_utils.sparse_ragged_mul( batch_qry_entities, ent2fact_ind, ent2fact_val, batch_size, fact_mips_config.num_facts, "sum", # max or sum threshold=None, fix_values_to_one=True) # Note: set a hyper parameter in qa.config # Note: can we do top k here for sparse tensor? # Limit the number of init facts such that we won't have too many facts. # mask = tf.greater(initial_facts.values, 1) # >= 2 qry concepts # initial_facts = tf.sparse.retain(initial_facts, mask) scaled_initial_facts = maxscale_spare_tensor(initial_facts) mask_thresold = tf.greater(scaled_initial_facts.values, 0.25) final_initial_facts = tf.sparse.retain(scaled_initial_facts, mask_thresold) if is_printing: tmp_vals = final_initial_facts.values tmp_vals = tf.compat.v1.Print( input_=tmp_vals, data=[ tf.shape(initial_facts.indices), initial_facts.values, ], message="-" * 100 + "\n\n ## Initial Facts (at hop 0):\n" "shape(initial_facts), initial_facts.values,", first_n=10, summarize=52) tmp_vals = tf.compat.v1.Print( input_=tmp_vals, data=[ tf.shape(scaled_initial_facts.indices), scaled_initial_facts.values, ], message="shape(scaled_initial_facts), scaled_initial_facts.values,", first_n=10, summarize=52) tmp_vals = tf.compat.v1.Print( input_=tmp_vals, data=[ tf.shape(final_initial_facts.indices), final_initial_facts.values, ], message="shape(final_initial_facts), final_initial_facts.values,", first_n=10, summarize=52) final_initial_facts = tf.SparseTensor(final_initial_facts.indices, tmp_vals, final_initial_facts.dense_shape) layer_facts, layer_entities = [], [] layer_dense, layer_sp = [], [] batch_facts = final_initial_facts for hop in range(num_hops): with tf.name_scope("hop_%d" % hop): # The question start/end embeddings for each hop. qry_start_emb, qry_end_emb = model_utils.layer_qry_encoder( qry_seq_emb, qry_input_ids, qry_input_mask, is_training, bert_config, qa_config, suffix="_%d" % hop, project_dim=qa_config.projection_dim) # project=True ret_entities, ret_facts, _, _ = follow_fact( batch_facts, qry_start_emb, qry_end_emb, fact2fact_ind, fact2fact_val, fact2ent_ind, fact2ent_val, fact_mips_search_fn, tf_fact_db, fact_mips_config, qa_config, is_training, hop, is_printing) batch_facts = ret_facts # Update to next hop. # Update results. layer_facts.append(ret_facts) layer_entities.append(ret_entities) tf.logging.info("len layer_facts: %d", len(layer_facts)) tf.logging.info("len layer_entities: %d", len(layer_entities)) return (layer_entities, layer_facts, layer_dense, layer_sp, batch_qry_entities, initial_facts, qry_seq_emb)
def multi_hop_mention(qry_input_ids, qry_input_mask, qry_entity_ids, entity_ids, entity_mask, ent2ment_ind, ent2ment_val, ment2ent_map, is_training, use_one_hot_embeddings, bert_config, qa_config, mips_config, num_hops, exclude_set=None, bridge_mentions=None, answer_mentions=None): # answer mentions? """Multi-hops of propagation from input to output entities. Args: qry_input_ids: qry_input_mask: qry_entity_ids: entity_ids: (entity_word_ids) [num_entities, max_entity_len] Tensor holding word ids of each entity. entity_mask: (entity_word_masks) [num_entities, max_entity_len] Tensor with masks into word ids above. ent2ment_ind: ent2ment_val: ment2ent_map: is_training: use_one_hot_embeddings: bert_config: qa_config: mips_config: num_hops: exclude_set: bridge_mentions: answer_mentions: Returns: layer_entities: layer_mentions: layer_dense: layer_sp: batch_entities_nosc: qry_seq_emb: """ # for question BOW embedding with tf.variable_scope("qry/bow"): # Note: trainable word weights over the BERT vocab for query word_weights = tf.get_variable( "word_weights", [bert_config.vocab_size, 1], dtype=tf.float32, initializer=tf.ones_initializer()) # Note: we can use the [CLS] token here? qry_seq_emb, word_emb_table = model_utils.shared_qry_encoder_v2( qry_input_ids, qry_input_mask, is_training, use_one_hot_embeddings, bert_config, qa_config) batch_size = tf.shape(qry_input_ids)[0] # Multiple entities per question. We need to re-score. with tf.name_scope("entity_linking"): batch_entity_emb = model_utils.entity_emb( tf.cast(qry_entity_ids.values, tf.int64), entity_ids, entity_mask, word_emb_table, word_weights) # question entity embeddings. # Embed query into start and end vectors for dense retrieval for a hop. qry_el_emb, _ = model_utils.layer_qry_encoder( # question embeddings qry_seq_emb, qry_input_ids, qry_input_mask, is_training, bert_config, qa_config, suffix="_el", project=False) batch_qry_el_emb = tf.gather(qry_el_emb, qry_entity_ids.indices[:, 0]) batch_entity_el_scs = tf.reduce_sum(batch_qry_el_emb * batch_entity_emb, -1) batch_entities_nosc = tf.SparseTensor( # Note: double check this. indices=tf.concat([ qry_entity_ids.indices[:, 0:1], tf.cast(tf.expand_dims(qry_entity_ids.values, 1), tf.int64) ], axis=1), values=batch_entity_el_scs, dense_shape=[batch_size, qa_config.num_entities]) batch_entities = tf.sparse.softmax(tf.sparse.reorder(batch_entities_nosc)) ensure_mentions = bridge_mentions # Note: check "supporoting facts" with tf.device("/cpu:0"): # MIPS search for mentions. Mention Feature Database tf_db, mips_search_fn = search_utils.create_mips_searcher( mips_config.ckpt_var_name, # [mips_config.num_mentions, mips_config.emb_size], mips_config.ckpt_path, mips_config.num_neighbors, local_var_name="scam_init_barrier") layer_mentions, layer_entities = [], [] layer_dense, layer_sp = [], [] for hop in range(num_hops): with tf.name_scope("hop_%d" % hop): # Note: the question start/end embeddings for each hop? qry_start_emb, qry_end_emb = model_utils.layer_qry_encoder( qry_seq_emb, qry_input_ids, qry_input_mask, is_training, bert_config, qa_config, suffix="_%d" % hop) # project=True (ret_entities, ret_mentions, dense_mention_vec, sp_mention_vec) = follow_mention( batch_entities, qry_start_emb, qry_end_emb, entity_ids, entity_mask, ent2ment_ind, ent2ment_val, ment2ent_map, word_emb_table, word_weights, mips_search_fn, tf_db, bert_config.hidden_size, mips_config, qa_config, is_training, ensure_mentions) # Note: check this. Shouldn't for wrong choices. if exclude_set: # batch_ind = tf.expand_dims(tf.range(batch_size), 1) exclude_indices = tf.concat([ tf.cast(exclude_set.indices[:, 0:1], tf.int64), tf.cast(tf.expand_dims(exclude_set.values, 1), tf.int64) ], axis=1) ret_entities = model_utils.remove_from_sparse(ret_entities, exclude_indices) ret_entities = tf.sparse.reorder(ret_entities) scaled_entities = tf.SparseTensor( indices=ret_entities.indices, values=ret_entities.values / qa_config.softmax_temperature, dense_shape=ret_entities.dense_shape) batch_entities = tf.sparse.softmax(scaled_entities) # entities updated. ### Start of debugging w/ tf.Print ### tmp_vals = batch_entities.values tmp_vals = tf.compat.v1.Print( input_=tmp_vals, data=[ ret_entities.indices, ], message="ret_entities.indices at hop %d \n" % hop, first_n=10, summarize=50) tmp_vals = tf.compat.v1.Print( input_=tmp_vals, data=[ ret_entities.values, ], message="ret_entities.values at hop %d \n" % hop, first_n=10, summarize=25) tmp_vals = tf.compat.v1.Print( input_=tmp_vals, data=[ batch_entities.indices, ], message="scaled_entities.indices at hop %d \n" % hop, first_n=10, summarize=50) tmp_vals = tf.compat.v1.Print( input_=tmp_vals, data=[ batch_entities.values, ], message="scaled_entities.values at hop %d \n" % hop, first_n=10, summarize=25) batch_entities = tf.SparseTensor( indices=batch_entities.indices, values=tmp_vals, dense_shape=batch_entities.dense_shape) ### End of debugging w/ tf.Print ### ensure_mentions = answer_mentions # Note: seems not helpful now? layer_mentions.append(ret_mentions) layer_entities.append(ret_entities) # Note that this is not sfed. layer_dense.append(dense_mention_vec) layer_sp.append(sp_mention_vec) return (layer_entities, layer_mentions, layer_dense, layer_sp, batch_entities_nosc, qry_seq_emb)
def follow_fact( batch_facts, relation_st_qry, relation_en_qry, fact2fact_ind, fact2fact_val, fact2ent_ind, fact2ent_val, fact_mips_search_fn, tf_fact_db, fact_mips_config, qa_config, is_training, hop_id=0, is_printing=True, ): """Sparse implementation of the relation follow operation. Args: batch_facts: [batch_size, num_facts] SparseTensor of incoming facts and their scores. relation_st_qry: [batch_size, dim] Tensor representating start query vectors for dense retrieval. relation_en_qry: [batch_size, dim] Tensor representating end query vectors for dense retrieval. fact2fact_ind: [num_facts, num_facts] RaggedTensor mapping facts to entity indices which co-occur with them. fact2fact_val: [num_facts, num_facts] RaggedTensor mapping facts to entity scores which co-occur with them. fact2ent_ind: [num_facts, num_entities] RaggedTensor mapping facts to entity indices which co-occur with them. fact2ent_val: [num_facts, num_entities] RaggedTensor mapping facts to entity scores which co-occur with them. fact_mips_search_fn: Function which accepts a dense query vector and returns the top-k indices closest to it (from the tf_fact_db). tf_fact_db: [num_facts, 2 * dim] Tensor of fact representations. fact_mips_config: MIPS Config object. qa_config: QAConfig object. is_training: Boolean. hop_id: int, the current hop id. is_printing: if print results for debugging. Returns: ret_entities: [batch_size, num_entities] Tensor of retrieved entities. ret_facts: [batch_size, num_facts] Tensor of retrieved facts. dense_fact_vec: [batch_size, num_facts] Tensor of retrieved facts (dense). sp_fact_vec: [batch_size, num_facts] Tensor of retrieved facts (sparse). """ num_facts = fact_mips_config.num_facts batch_size = batch_facts.dense_shape[0] # number of examples in a batch example_ind = batch_facts.indices[:, 0] # the list of the example ids fact_ind = batch_facts.indices[:, 1] # the list of the fact ids fact_scs = batch_facts.values # the list of the scores of each fact uniq_original_example_ind, uniq_local_example_idx = tf.unique(example_ind) # uniq_original_example_ind: local to original example id # uniq_local_example_idx: a list of local example id # tf.shape(uniq_original_example_ind)[0] = num_examples if qa_config.fact_score_threshold is not None: # Remove the facts which have scores lower than the threshold. mask = tf.greater(batch_facts.values, qa_config.fact_score_threshold) batch_facts = tf.sparse.retain(batch_facts, mask) # Sparse: Ragged sparse search from the current facts to the next facts. # (num_batch x num_facts) X (num_facts x num_facts) # [batch_size x num_facts] sparse if hop_id > 0: sp_fact_vec = model_utils.sparse_ragged_mul( batch_facts, fact2fact_ind, fact2fact_val, batch_size, num_facts, "sum", # Note: check this. threshold=None, fix_values_to_one=True) # Note: find a better way for this. mask = tf.greater(sp_fact_vec.values, 3) # 1/0.2 = 5 sp_fact_vec = tf.sparse.retain(sp_fact_vec, mask) else: # For the first hop, then we use the init fact itself. # Because the sparse retieval is already done from the question. sp_fact_vec = batch_facts # Note: Remove the previous hop's facts # Note: Limit the number of fact followers. # Dense: Aggregate the facts in each batch as a single fact embedding vector. fact_embs = tf.gather(tf_fact_db, fact_ind) # len(fact_ind) X 2dim # Note: check, does mean make sense? # sum if it was softmaxed # mean.. del fact_scs # Not used for now. # fact_embs = fact_embs * tf.expand_dims(fact_scs, axis=1) #batch_fact.values ### Start of debugging w/ tf.Print ### if is_printing: fact_embs = tf.compat.v1.Print( input_=fact_embs, data=[tf.shape(batch_facts.indices)[0], batch_facts.indices], message="\n\n###\n batch_facts.indices and total #facts at hop %d \n" % hop_id, first_n=10, summarize=50) fact_embs = tf.compat.v1.Print( input_=fact_embs, data=[ batch_facts.values, ], message="batch_facts.values at hop %d \n" % hop_id, first_n=10, summarize=25) fact_embs = tf.compat.v1.Print( input_=fact_embs, data=[tf.shape(sp_fact_vec.indices)[0], sp_fact_vec.indices], message="\n Sparse Fact Results @ hop %d \n" % hop_id + " sp_fact_vec.indices at hop %d \n" % hop_id, first_n=10, summarize=50) fact_embs = tf.compat.v1.Print( input_=fact_embs, data=[ sp_fact_vec.values, ], message="sp_fact_vec.values at hop %d \n" % hop_id, first_n=10, summarize=25) ### End of debugging w/ tf.Print ### agg_emb = tf.math.unsorted_segment_mean( fact_embs, uniq_local_example_idx, tf.shape(uniq_original_example_ind)[0]) batch_fact_emb = tf.scatter_nd( tf.expand_dims(uniq_original_example_ind, 1), agg_emb, tf.stack([batch_size, 2 * qa_config.projection_dim], axis=0)) # Each instance in a batch has onely one vector as the overall fact emb. batch_fact_emb.set_shape([None, 2 * qa_config.projection_dim]) # Note: Normalize the embeddings if they are not from SoftMax. # batch_fact_emb = tf.nn.l2_normalize(batch_fact_emb, axis=1) # Dense scam search. # [batch_size, 2 * dim] # Note: reform query embeddings. scam_qrys = batch_fact_emb + tf.concat([relation_st_qry, relation_en_qry], axis=1) with tf.device("/cpu:0"): # [batch_size, num_neighbors] _, ret_fact_ids = fact_mips_search_fn(scam_qrys) # [batch_size, num_neighbors, 2 * dim] ret_fact_emb = tf.gather(tf_fact_db, ret_fact_ids) if qa_config.l2_normalize_db: ret_fact_emb = tf.nn.l2_normalize(ret_fact_emb, axis=2) # [batch_size, 1, num_neighbors] # The score of a fact is its innder product with qry. ret_fact_scs = tf.matmul( tf.expand_dims(scam_qrys, 1), ret_fact_emb, transpose_b=True) # [batch_size, num_neighbors] ret_fact_scs = tf.squeeze(ret_fact_scs, 1) # [batch_size, num_facts] sparse dense_fact_vec = model_utils.convert_search_to_vector( ret_fact_scs, ret_fact_ids, tf.cast(batch_size, tf.int32), fact_mips_config.num_neighbors, fact_mips_config.num_facts) # Combine sparse and dense search. if (is_training and qa_config.train_with_sparse) or ( (not is_training) and qa_config.predict_with_sparse): # [batch_size, num_mentions] sparse if qa_config.sparse_strategy == "dense_first": ret_fact_vec = model_utils.sp_sp_matmul(dense_fact_vec, sp_fact_vec) elif qa_config.sparse_strategy == "sparse_first": with tf.device("/cpu:0"): ret_fact_vec = model_utils.rescore_sparse(sp_fact_vec, tf_fact_db, scam_qrys) else: raise ValueError("Unrecognized sparse_strategy %s" % qa_config.sparse_strategy) else: # [batch_size, num_facts] sparse ret_fact_vec = dense_fact_vec # # Scaling facts with SoftMax. ret_fact_vec = tf.sparse.reorder(ret_fact_vec) # max_ip_scores = tf.reduce_max(ret_fact_vec.values) # min_ip_scores = tf.reduce_min(ret_fact_vec.values) # range_ip_scores = max_ip_scores - min_ip_scores # scaled_values = (ret_fact_vec.values - min_ip_scores) / range_ip_scores scaled_facts = tf.SparseTensor( indices=ret_fact_vec.indices, values=ret_fact_vec.values / tf.reduce_max(ret_fact_vec.values), dense_shape=ret_fact_vec.dense_shape) # ret_fact_vec_sf = tf.sparse.softmax(scaled_facts) ret_fact_vec_sf = scaled_facts # Remove the facts which have scores lower than the threshold. mask = tf.greater(ret_fact_vec_sf.values, 0.5) # Must larger than max/5 ret_fact_vec_sf_fitered = tf.sparse.retain(ret_fact_vec_sf, mask) # Note: add a soft way to score (all) the entities based on the facts. # Note: maybe use the pre-computed (tf-idf) similarity score here. e2e # Retrieve entities before Fact-SoftMaxing ret_entities_nosc = model_utils.sparse_ragged_mul( ret_fact_vec_sf, # Use the non-filtered scores of the retrieved facts. fact2ent_ind, fact2ent_val, batch_size, qa_config.num_entities, "sum", threshold=qa_config.fact_score_threshold, fix_values_to_one=True) ret_entities = tf.SparseTensor( indices=ret_entities_nosc.indices, values=ret_entities_nosc.values / tf.reduce_max(ret_entities_nosc.values), dense_shape=ret_entities_nosc.dense_shape) ### Start of debugging w/ tf.Print ### if is_printing: tmp_vals = ret_entities.values tmp_vals = tf.compat.v1.Print( input_=tmp_vals, data=[tf.shape(ret_fact_vec.indices)[0], ret_fact_vec.indices], message="\n\n-rescored- ret_fact_vec.indices at hop %d \n" % hop_id, first_n=10, summarize=51) tmp_vals = tf.compat.v1.Print( input_=tmp_vals, data=[ ret_fact_vec.values, ], message="-rescored- ret_fact_vec.values at hop %d \n" % hop_id, first_n=10, summarize=25) tmp_vals = tf.compat.v1.Print( input_=tmp_vals, data=[ ret_fact_vec_sf.values, ], message="ret_fact_vec_sf.values at hop %d \n" % hop_id, first_n=10, summarize=25) tmp_vals = tf.compat.v1.Print( input_=tmp_vals, data=[ tf.shape(ret_fact_vec_sf_fitered.values), ret_fact_vec_sf_fitered.values, ], message="ret_fact_vec_sf_fitered.values at hop %d \n" % hop_id, first_n=10, summarize=25) ret_entities = tf.SparseTensor( indices=ret_entities.indices, values=tmp_vals, dense_shape=ret_entities.dense_shape) ### End of debugging w/ tf.Print ### return ret_entities, ret_fact_vec_sf_fitered, None, None
def follow_mention(batch_entities, relation_st_qry, relation_en_qry, entity_word_ids, entity_word_masks, ent2ment_ind, ent2ment_val, ment2ent_map, word_emb_table, word_weights, mips_search_fn, tf_db, hidden_size, mips_config, qa_config, is_training, ensure_index=None): """Sparse implementation of the relation follow operation. Args: batch_entities: [batch_size, num_entities] SparseTensor of incoming entities and their scores. relation_st_qry: [batch_size, dim] Tensor representating start query vectors for dense retrieval. relation_en_qry: [batch_size, dim] Tensor representating end query vectors for dense retrieval. entity_word_ids: [num_entities, max_entity_len] Tensor holding word ids of each entity. entity_word_masks: [num_entities, max_entity_len] Tensor with masks into word ids above. ent2ment_ind: [num_entities, num_mentions] RaggedTensor mapping entities to mention indices which co-occur with them. ent2ment_val: [num_entities, num_mentions] RaggedTensor mapping entities to mention scores which co-occur with them. ment2ent_map: [num_mentions] Tensor mapping mentions to their entities. word_emb_table: [vocab_size, dim] Tensor of word embedddings. (?) word_weights: [vocab_size, 1] Tensor of word weights. (?) mips_search_fn: Function which accepts a dense query vector and returns the top-k indices closest to it (from the tf_db). tf_db: [num_mentions, 2 * dim] Tensor of mention representations. hidden_size: Scalar dimension of word embeddings. mips_config: MIPSConfig object. qa_config: QAConfig object. is_training: Boolean. ensure_index: [batch_size] Tensor of mention ids. Only needed if `is_training` is True. (? each example only one ensure entity?) Returns: ret_mentions_ids: [batch_size, k] Tensor of retrieved mention ids. ret_mentions_scs: [batch_size, k] Tensor of retrieved mention scores. ret_entities_ids: [batch_size, k] Tensor of retrieved entities ids. """ if qa_config.entity_score_threshold is not None: # Remove the entities which have scores lower than the threshold. mask = tf.greater(batch_entities.values, qa_config.entity_score_threshold) batch_entities = tf.sparse.retain(batch_entities, mask) batch_size = batch_entities.dense_shape[0] # number of the batches batch_ind = batch_entities.indices[:, 0] # the list of the batch ids entity_ind = batch_entities.indices[:, 1] # the list of the entity ids entity_scs = batch_entities.values # the list of the scores of each entity # Obtain BOW embeddings for the given set of entities. # [NNZ, dim] NNZ (number of non-zero entries) = len(entity_ind) batch_entity_emb = model_utils.entity_emb(entity_ind, entity_word_ids, entity_word_masks, word_emb_table, word_weights) batch_entity_emb = batch_entity_emb * tf.expand_dims(entity_scs, axis=1) # [batch_size, dim] uniq_batch_ind, uniq_idx = tf.unique(batch_ind) agg_emb = tf.unsorted_segment_sum(batch_entity_emb, uniq_idx, tf.shape(uniq_batch_ind)[0]) batch_bow_emb = tf.scatter_nd( tf.expand_dims(uniq_batch_ind, 1), agg_emb, tf.stack([batch_size, hidden_size], axis=0)) batch_bow_emb.set_shape([None, hidden_size]) if qa_config.projection_dim is not None: with tf.variable_scope("projection"): batch_bow_emb = contrib_layers.fully_connected( batch_bow_emb, qa_config.projection_dim, activation_fn=tf.nn.tanh, reuse=tf.AUTO_REUSE, scope="bow_projection") # Each instance in a batch has onely one vector as embedding. # Ragged sparse search. # (num_batch x num_entities) * (num_entities x num_mentions) # [batch_size x num_mentions] sparse sp_mention_vec = model_utils.sparse_ragged_mul( batch_entities, ent2ment_ind, ent2ment_val, batch_size, mips_config.num_mentions, qa_config.sparse_reduce_fn, # max or sum threshold=qa_config.entity_score_threshold, fix_values_to_one=qa_config.fix_sparse_to_one) if is_training and qa_config.ensure_answer_sparse: ensure_indices = tf.stack([tf.range(batch_size), ensure_index], axis=-1) sp_ensure_vec = tf.SparseTensor( tf.cast(ensure_indices, tf.int64), tf.ones([batch_size]), dense_shape=[batch_size, mips_config.num_mentions]) sp_mention_vec = tf.sparse.add(sp_mention_vec, sp_ensure_vec) sp_mention_vec = tf.SparseTensor( indices=sp_mention_vec.indices, values=tf.minimum(1., sp_mention_vec.values), dense_shape=sp_mention_vec.dense_shape) # Dense scam search. # [batch_size, 2 * dim] # Constuct query embeddings (dual encoder: [subject; relation]). scam_qrys = tf.concat( [batch_bow_emb + relation_st_qry, batch_bow_emb + relation_en_qry], axis=1) with tf.device("/cpu:0"): # [batch_size, num_neighbors] _, ret_mention_ids = mips_search_fn(scam_qrys) if is_training and qa_config.ensure_answer_dense: ret_mention_ids = model_utils.ensure_values_in_mat( ret_mention_ids, ensure_index, tf.int32) # [batch_size, num_neighbors, 2 * dim] ret_mention_emb = tf.gather(tf_db, ret_mention_ids) if qa_config.l2_normalize_db: ret_mention_emb = tf.nn.l2_normalize(ret_mention_emb, axis=2) # [batch_size, 1, num_neighbors] ret_mention_scs = tf.matmul( tf.expand_dims(scam_qrys, 1), ret_mention_emb, transpose_b=True) # [batch_size, num_neighbors] ret_mention_scs = tf.squeeze(ret_mention_scs, 1) # [batch_size, num_mentions] sparse dense_mention_vec = model_utils.convert_search_to_vector( ret_mention_scs, ret_mention_ids, tf.cast(batch_size, tf.int32), mips_config.num_neighbors, mips_config.num_mentions) # Combine sparse and dense search. if (is_training and qa_config.train_with_sparse) or ( (not is_training) and qa_config.predict_with_sparse): # [batch_size, num_mentions] sparse if qa_config.sparse_strategy == "dense_first": ret_mention_vec = model_utils.sp_sp_matmul(dense_mention_vec, sp_mention_vec) elif qa_config.sparse_strategy == "sparse_first": with tf.device("/cpu:0"): ret_mention_vec = model_utils.rescore_sparse(sp_mention_vec, tf_db, scam_qrys) else: raise ValueError("Unrecognized sparse_strategy %s" % qa_config.sparse_strategy) else: # [batch_size, num_mentions] sparse ret_mention_vec = dense_mention_vec # Get entity scores and ids. # [batch_size, num_entities] sparse entity_indices = tf.cast( tf.gather(ment2ent_map, ret_mention_vec.indices[:, 1]), tf.int64) ret_entity_vec = tf.SparseTensor( indices=tf.concat( [ret_mention_vec.indices[:, 0:1], tf.expand_dims(entity_indices, 1)], axis=1), values=ret_mention_vec.values, dense_shape=[batch_size, qa_config.num_entities]) return ret_entity_vec, ret_mention_vec, dense_mention_vec, sp_mention_vec
def input_producer(raw_data, batch_size, num_steps, shuffle=False, randomize=False, random_len=False): """Produces graph-based input for Penn Treebank. Args: raw_data: np tensor of size [num_words]. batch_size: self-explained. num_steps: number of BPTT steps. shuffle: whether to shuffle sentences. randomize: use random segments instead of the continuous corpus. random_len: random sequence len. Returns: If `random_len` is set, return op that represents whether we have reached the end of a sequence. Otherwise, return number of batches in an epoch. """ num_batches_per_epoch = ( (np.size(raw_data) // batch_size) - 1) // num_steps raw_data = tf.convert_to_tensor(raw_data, name='raw_data', dtype=tf.int32) data_len = tf.size(raw_data) batch_len = data_len // batch_size data = tf.reshape(raw_data[0:batch_size * batch_len], [batch_size, batch_len]) epoch_size = (batch_len - 1) // num_steps with tf.device('/cpu:0'): epoch_size = tf.identity(epoch_size, name='epoch_size') if random_len: start_idx = tf.Variable(0, name='start_idx', dtype=tf.int32, trainable=False) base_bptt = tf.cond( tf.random_uniform(shape=(), minval=0., maxval=1.) < 0.95, lambda: tf.cast(num_steps, dtype=tf.float32), lambda: tf.cast(num_steps, dtype=tf.float32) / 2.) seq_len = tf.random.truncated_normal(shape=(), mean=base_bptt, stddev=5., dtype=tf.float32) seq_len = tf.cast(seq_len, dtype=tf.int32) seq_len = tf.minimum(seq_len, num_steps + 20) # seq_len <= bptt + 40 seq_len = tf.minimum(seq_len, batch_len - start_idx - 1) end_idx = start_idx + seq_len x = data[:, start_idx:end_idx] y = data[:, start_idx + 1:end_idx + 1] with tf.control_dependencies([x, y]): with tf.control_dependencies([tf.assign(start_idx, end_idx)]): should_reset = tf.greater_equal(end_idx, batch_len - 3) reset_start_idx = tf.assign(start_idx, 0) return (x, y, num_batches_per_epoch, reset_start_idx, should_reset, base_bptt) if randomize: i = tf.random_uniform([1], minval=0, maxval=batch_len - num_steps, dtype=tf.int32) x = tf.strided_slice(data, [0, i], [batch_size, i + num_steps]) y = tf.strided_slice(data, [0, i + 1], [batch_size, i + num_steps + 1]) else: i = tf.train.range_input_producer(epoch_size, shuffle=shuffle).dequeue() x = tf.strided_slice(data, [0, i * num_steps], [batch_size, (i + 1) * num_steps]) y = tf.strided_slice(data, [0, i * num_steps + 1], [batch_size, (i + 1) * num_steps + 1]) x.set_shape([batch_size, num_steps]) y.set_shape([batch_size, num_steps]) return x, y, num_batches_per_epoch
def __init__(self, batch_env, step, is_training, should_log, config): """Create an instance of the PPO algorithm. Args: batch_env: In-graph batch environment. step: Integer tensor holding the current training step. is_training: Boolean tensor for whether the algorithm should train. should_log: Boolean tensor for whether summaries should be returned. config: Object containing the agent configuration as attributes. """ self._batch_env = batch_env self._step = step self._is_training = is_training self._should_log = should_log self._config = config self._observ_filter = normalize.StreamingNormalize( self._batch_env.observ[0], center=True, scale=True, clip=5, name='normalize_observ') self._reward_filter = normalize.StreamingNormalize( self._batch_env.reward[0], center=False, scale=True, clip=10, name='normalize_reward') # Memory stores tuple of observ, action, mean, logstd, reward. template = (self._batch_env.observ[0], self._batch_env.action[0], self._batch_env.action[0], self._batch_env.action[0], self._batch_env.reward[0]) self._memory = memory.EpisodeMemory(template, config.update_every, config.max_length, 'memory') self._memory_index = tf.Variable(0, False) use_gpu = self._config.use_gpu and utility.available_gpus() with tf.device('/gpu:0' if use_gpu else '/cpu:0'): # Create network variables for later calls to reuse. self._network(tf.zeros_like(self._batch_env.observ)[:, None], tf.ones(len(self._batch_env)), reuse=None) cell = self._config.network(self._batch_env.action.shape[1].value) with tf.variable_scope('ppo_temporary'): self._episodes = memory.EpisodeMemory(template, len(batch_env), config.max_length, 'episodes') self._last_state = utility.create_nested_vars( cell.zero_state(len(batch_env), tf.float32)) self._last_action = tf.Variable(tf.zeros_like( self._batch_env.action), False, name='last_action') self._last_mean = tf.Variable(tf.zeros_like( self._batch_env.action), False, name='last_mean') self._last_logstd = tf.Variable(tf.zeros_like( self._batch_env.action), False, name='last_logstd') self._penalty = tf.Variable(self._config.kl_init_penalty, False, dtype=tf.float32) self._policy_optimizer = self._config.policy_optimizer( self._config.policy_lr, name='policy_optimizer') self._value_optimizer = self._config.value_optimizer( self._config.value_lr, name='value_optimizer')
def train(args, build_train_graph): """Trains the model.""" if args.verbose: tf.logging.set_verbosity(tf.logging.INFO) else: tf.logging.set_verbosity(tf.logging.ERROR) # Create input data pipeline. with tf.device("/cpu:0"): train_files = glob.glob(args.train_glob) if not train_files: raise RuntimeError( "No training images found with glob '{}'.".format(args.train_glob)) train_dataset = tf.data.Dataset.from_tensor_slices(train_files) train_dataset = train_dataset.shuffle(buffer_size=len(train_files)).repeat() if 'npy' in args.train_glob: # reading numpy arrays directly instead of from images train_dataset = train_dataset.map( # https://stackoverflow.com/a/49459838 lambda item: tuple(tf.numpy_function(read_npy_file_helper, [item], [tf.float32, ])), num_parallel_calls=args.preprocess_threads) else: train_dataset = train_dataset.map( read_png, num_parallel_calls=args.preprocess_threads) train_dataset = train_dataset.map(lambda x: tf.random_crop(x, (args.patchsize, args.patchsize, 3))) train_dataset = train_dataset.batch(args.batchsize) train_dataset = train_dataset.prefetch(32) # num_pixels = args.batchsize * args.patchsize ** 2 # Get training patch from dataset. x = train_dataset.make_one_shot_iterator().get_next() res = build_train_graph(args, x) train_loss = res['train_loss'] train_op = res['train_op'] model_name = res['model_name'] # boiler plate code for logging runname = get_runname(vars(args), record_keys=('num_filters', 'num_hfilters', 'lmbda'), prefix=model_name) save_dir = os.path.join(args.checkpoint_dir, runname) if not os.path.exists(save_dir): os.makedirs(save_dir) import json import datetime with open(os.path.join(save_dir, 'record.txt'), 'a') as f: # keep more detailed record in text file f.write(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + '\n') f.write(json.dumps(vars(args), indent=4, sort_keys=True) + '\n') f.write('\n') with open(os.path.join(save_dir, 'args.json'), 'w') as f: # will overwrite existing json.dump(vars(args), f, indent=4, sort_keys=True) # save a copy of the script that defined the model from shutil import copy copied_path = copy(model_name + '.py', save_dir) print('Saved a copy of %s.py to %s' % (model_name, copied_path)) hooks = [ tf.train.StopAtStepHook(last_step=args.last_step), tf.train.NanTensorHook(train_loss), ] save_summary_secs = args.save_summary_secs if args.logdir != '': for key in res: if 'bpp' in key or 'loss' in key or key in ('mse', 'psnr'): tf.summary.scalar(key, res[key]) elif key in ('original', 'reconstruction'): tf.summary.image(key, res[key], max_outputs=2) summary_op = tf.summary.merge_all() tf_log_dir = os.path.join(args.logdir, runname) summary_hook = tf.train.SummarySaverHook(save_secs=save_summary_secs, output_dir=tf_log_dir, summary_op=summary_op) hooks.append(summary_hook) with tf.train.MonitoredTrainingSession( hooks=hooks, checkpoint_dir=save_dir, save_checkpoint_secs=args.save_checkpoint_secs, save_summaries_secs=save_summary_secs) as sess: while not sess.should_stop(): sess.run(train_op)
def main(unused_argv=None): os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu_number) source_path = utils.shell_path(FLAGS.source_path) checkpoint_path = utils.shell_path(FLAGS.checkpoint_path) save_path = utils.shell_path(FLAGS.save_path) if not save_path: raise ValueError("Must specify a save_path.") tf.logging.set_verbosity(FLAGS.log) # Use directory of files if tf.gfile.IsDirectory(source_path): files = tf.gfile.ListDirectory(source_path) file_extensions = [os.path.splitext(f)[1] for f in files] if ".wav" in file_extensions: file_extension = ".wav" elif ".npy" in file_extensions: file_extension = ".npy" else: raise RuntimeError("Folder must contain .wav or .npy files.") file_extension = ".npy" if FLAGS.npy_only else file_extension files = sorted([ os.path.join(source_path, fname) for fname in files if fname.lower().endswith(file_extension) ]) # Use a single file elif source_path.lower().endswith((".wav", ".npy")): file_extension = os.path.splitext(source_path.lower())[1] files = [source_path] else: raise ValueError( "source_path {} must be a folder or file.".format(source_path)) # Now synthesize from files one batch at a time batch_size = FLAGS.batch_size sample_length = FLAGS.sample_length n = len(files) for start in range(0, n, batch_size): end = start + batch_size batch_files = files[start:end] save_names = [ os.path.join(save_path, "gen_" + os.path.splitext(os.path.basename(f))[0] + ".wav") for f in batch_files ] # Encode waveforms if file_extension == ".wav": batch_data = fastgen.load_batch_audio( batch_files, sample_length=sample_length) encodings = fastgen.encode( batch_data, checkpoint_path, sample_length=sample_length) # Or load encodings else: encodings = fastgen.load_batch_encodings( batch_files, sample_length=sample_length) # Synthesize multi-gpu if FLAGS.gpu_number != 0: with tf.device("/device:GPU:%d" % FLAGS.gpu_number): fastgen.synthesize( encodings, save_names, checkpoint_path=checkpoint_path) # Single gpu else: fastgen.synthesize( encodings, save_names, checkpoint_path=checkpoint_path)
def provide_one_hot_labels(self, batch_size): """Returns a batch of one-hot labels.""" with tf.name_scope('inputs'): with tf.device('/cpu:0'): return self.dataset.provide_one_hot_labels( batch_size=batch_size)
def main(args): tf.logging.set_verbosity(tf.logging.ERROR) np.set_printoptions(linewidth=200) random_seed = args.random_seed checkpoint_path = os.path.join(tempfile.mkdtemp(), "model.ckpt") # Input activations for the attention layer random_gen = np.random.default_rng(seed=random_seed) activations_np = random_gen.uniform(-0.1, 0.1, size=(args.batch_size, args.source_sequence_length, args.hidden_length)) # Configure the IPU cfg = ipu.utils.create_ipu_config(profiling=args.profile, report_directory="./report/") cfg = ipu.utils.auto_select_ipus(cfg, 1) ipu.utils.configure_ipu_system(cfg) # Build IPU graphs sparse_decoder_graph = tf.Graph() sparse_transformer = DynsparseTransformer(args) with sparse_decoder_graph.as_default(): with tf.device("cpu"): # placeholder for activations # weight placeholders are created inside sparse_transfomer inputs_ph = tf.placeholder(args.dtype, activations_np.shape) with ipu.scopes.ipu_scope("/device:IPU:0"): sparse_decoder = partial(sparse_transformer_fwd_and_grad, sparse_transformer) sparse_decoder_fetches = ipu.ipu_compiler.compile( sparse_decoder, [inputs_ph]) ipu.utils.move_variable_initialization_to_cpu() # sparse-decoder with tf.Session(graph=sparse_decoder_graph) as sess: # initialize weights sess.run(tf.global_variables_initializer()) # Save the sparse weights to checkpoint as dense sparse_transformer.checkpointAsDense(checkpoint_path) # run sparse decoder sparse_result = sess.run(sparse_decoder_fetches, feed_dict={inputs_ph: activations_np}) # Create a dense transformer and initialize the weights to the values that # the sparse model was initialzed with originally dense_decoder_graph = tf.Graph() dense_transformer = DenseTransformer(args) with dense_decoder_graph.as_default(): with tf.device("cpu"): # placeholder for activations # weights will get streamed from checkpoint inputs_ph = tf.placeholder(args.dtype, activations_np.shape) with ipu.scopes.ipu_scope("/device:IPU:0"): dense_decoder_fetches = partial(dense_transformer_fwd_and_grad, dense_transformer) dense_graph = ipu.ipu_compiler.compile(dense_decoder_fetches, [inputs_ph]) ipu.utils.move_variable_initialization_to_cpu() with tf.device("cpu"): # We will only load the trainable variables, not momentum etc. loader = tf.train.Saver(tf.trainable_variables()) # dense-decoder with tf.Session(graph=dense_decoder_graph) as sess: # Initialized momentums which are not part of the checkpoint sess.run(tf.global_variables_initializer()) # Restore saved trainable variables loader.restore(sess, checkpoint_path) dense_result = sess.run(dense_graph, feed_dict={inputs_ph: activations_np}) # TEST rtol = 1e-05 atol = 1e-05 if args.dtype == tf.float16: rtol = 1e-04 atol = 1e-02 # Compare model output activations (actual vs. desired) -> (sparse vs. dense) np.testing.assert_allclose(sparse_result["output_activation"], dense_result["output_activation"], atol=atol, rtol=rtol, err_msg="Output activations do not match.") # Compate gradient of output wrt. input np.testing.assert_allclose(sparse_result["input_grad"], dense_result["input_grad"], atol=atol, rtol=rtol, err_msg="Grads wrt. inputs do not match") # Compare the dense_w and sparse grads of every sparse layer for name, sparse_layer in sparse_transformer.sparse_layers.items(): # Compate the dense grads dense_grad = dense_result[name + "/weight" + "_grad"] sparse_grad_w = sparse_result[name + "_grad_w"] np.testing.assert_allclose( sparse_grad_w, dense_grad, atol=atol, rtol=rtol, err_msg=f"Dense grads for layer {name} do not match") # Compare the sparse grads sparse_grad_padded = sparse_result[name + "/sparse_layer/nz_values_grad"] sparse_grad_data = sparse.SparseRepresentation( sparse_layer.weights.get_metainfo(), sparse_grad_padded) i, j, sparse_grad = sparse.triplets_from_representation( sparse_layer.weights.spec, sparse_grad_data, sparse_layer.weights.matmul_options) # Convert dense grads to blocks block_size, _ = sparse_layer.get_nonzero_blocks_shape() nx, ny = dense_grad.shape[0] // block_size, dense_grad.shape[ 1] // block_size strides = np.array(dense_grad.strides) # strides are in bytes strides = tuple(strides * block_size) + tuple(strides) blocked_dense_grad = np.lib.stride_tricks.as_strided( dense_grad, (nx, ny, block_size, block_size), strides) if block_size == 1: blocked_dense_grad = np.squeeze(np.copy(blocked_dense_grad), axis=(-2, -1)) np.testing.assert_allclose( sparse_grad, blocked_dense_grad[i, j], atol=atol, rtol=rtol, err_msg=f"Sparse grads for layer {name} do not match") print("All results match.") return sparse_result, dense_result
def train_eval_offline( # Basic args. log_dir, data_file, agent_module, env_name='HalfCheetah-v2', n_train=int(1e6), shuffle_steps=0, seed=0, use_seed_for_data=False, # Train and eval args. total_train_steps=int(1e6), summary_freq=100, print_freq=1000, save_freq=int(2e4), eval_freq=5000, n_eval_episodes=20, # Agent args. model_params=(((200, 200), ), 2), optimizers=(('adam', 0.001), ), batch_size=256, weight_decays=(0.0, ), update_freq=1, update_rate=0.005, discount=0.99, ): """Training a policy with a fixed dataset.""" # Create tf_env to get specs. tf_env = train_eval_utils.env_factory(env_name) observation_spec = tf_env.observation_spec() action_spec = tf_env.action_spec() # Prepare data. logging.info('Loading data from %s ...', data_file) data_size = utils.load_variable_from_ckpt(data_file, 'data._capacity') with tf.device('/cpu:0'): full_data = dataset.Dataset(observation_spec, action_spec, data_size) data_ckpt = tf.train.Checkpoint(data=full_data) data_ckpt.restore(data_file) # Split data. n_train = min(n_train, full_data.size) logging.info('n_train %s.', n_train) if use_seed_for_data: rand = np.random.RandomState(seed) else: rand = np.random.RandomState(0) shuffled_indices = utils.shuffle_indices_with_steps(n=full_data.size, steps=shuffle_steps, rand=rand) train_indices = shuffled_indices[:n_train] train_data = full_data.create_view(train_indices) # Create agent. agent_flags = utils.Flags(observation_spec=observation_spec, action_spec=action_spec, model_params=model_params, optimizers=optimizers, batch_size=batch_size, weight_decays=weight_decays, update_freq=update_freq, update_rate=update_rate, discount=discount, train_data=train_data) agent_args = agent_module.Config(agent_flags).agent_args agent = agent_module.Agent(**vars(agent_args)) agent_ckpt_name = os.path.join(log_dir, 'agent') # Restore agent from checkpoint if there exists one. if tf.io.gfile.exists('{}.index'.format(agent_ckpt_name)): logging.info('Checkpoint found at %s.', agent_ckpt_name) agent.restore(agent_ckpt_name) # Train agent. train_summary_dir = os.path.join(log_dir, 'train') eval_summary_dir = os.path.join(log_dir, 'eval') train_summary_writer = tf.compat.v2.summary.create_file_writer( train_summary_dir) eval_summary_writers = collections.OrderedDict() for policy_key in agent.test_policies.keys(): eval_summary_writer = tf.compat.v2.summary.create_file_writer( os.path.join(eval_summary_dir, policy_key)) eval_summary_writers[policy_key] = eval_summary_writer eval_results = [] time_st_total = time.time() time_st = time.time() step = agent.global_step timed_at_step = step while step < total_train_steps: agent.train_step() step = agent.global_step if step % summary_freq == 0 or step == total_train_steps: agent.write_train_summary(train_summary_writer) if step % print_freq == 0 or step == total_train_steps: agent.print_train_info() if step % eval_freq == 0 or step == total_train_steps: time_ed = time.time() time_cost = time_ed - time_st logging.info('Training at %.4g steps/s.', (step - timed_at_step) / time_cost) eval_result, eval_infos = train_eval_utils.eval_policies( tf_env, agent.test_policies, n_eval_episodes) eval_results.append([step] + eval_result) logging.info('Testing at step %d:', step) for policy_key, policy_info in eval_infos.items(): logging.info( utils.get_summary_str(step=None, info=policy_info, prefix=policy_key + ': ')) utils.write_summary(eval_summary_writers[policy_key], step, policy_info) time_st = time.time() timed_at_step = step if step % save_freq == 0: agent.save(agent_ckpt_name) logging.info('Agent saved at %s.', agent_ckpt_name) agent.save(agent_ckpt_name) time_cost = time.time() - time_st_total logging.info('Training finished, time cost %.4gs.', time_cost) return np.array(eval_results)
def test_train(args): """Trains the model.""" if args.verbose: tf.logging.set_verbosity(tf.logging.INFO) # Create input data pipeline. with tf.device("/cpu:0"): train_files = glob.glob(args.train_glob) if not train_files: raise RuntimeError( "No training images found with glob '{}'.".format( args.train_glob)) train_dataset = tf.data.Dataset.from_tensor_slices(train_files) train_dataset = train_dataset.shuffle( buffer_size=len(train_files)).repeat() train_dataset = train_dataset.map( read_png, num_parallel_calls=args.preprocess_threads) train_dataset = train_dataset.map( lambda x: tf.random_crop(x, (args.patchsize, args.patchsize, 3))) train_dataset = train_dataset.batch(args.batchsize) train_dataset = train_dataset.prefetch(32) num_pixels = args.batchsize * args.patchsize**2 # Get training patch from dataset. x = train_dataset.make_one_shot_iterator().get_next() # Instantiate model. analysis_transform = AnalysisTransform(args.num_filters) synthesis_transform = SynthesisTransform(args.num_filters) hyper_analysis_transform = HyperAnalysisTransform(args.num_filters) hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters) entropy_bottleneck = DynamicEntropyBottleneck(name="entropy_bottleneck") # Build autoencoder and hyperprior. y = analysis_transform(x) z = hyper_analysis_transform(abs(y)) z_tilde, z_likelihoods = entropy_bottleneck(z, training=True) sigma = hyper_synthesis_transform(z_tilde) scale_table = np.exp( np.linspace(np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS)) conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table) y_tilde, y_likelihoods = conditional_bottleneck(y, training=True) rand_rate = tf.random_uniform([], minval=0.0, maxval=0.75) # drop rate random_tensor = tf.random_uniform([256], dtype=tf.float32) keep_prob = 1 - rand_rate scale = 1 / keep_prob keep_mask = random_tensor >= rand_rate y_tilde_drop = y_tilde * scale * tf.cast(keep_mask, tf.float32) x_tilde = synthesis_transform(y_tilde_drop) # Total number of bits divided by number of pixels. train_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) + tf.reduce_sum( tf.log(z_likelihoods))) / (-np.log(2) * num_pixels) # Mean squared error across pixels. train_mse = tf.reduce_mean(tf.squared_difference(x, x_tilde)) # Multiply by 255^2 to correct for rescaling. train_mse *= 255**2 # The rate-distortion cost. train_loss = args.lmbda * train_mse + train_bpp with tf.Session() as sess: latest = tf.train.latest_checkpoint(checkpoint_dir="./tfc256-05") tf.train.Saver().restore(sess, save_path=latest) step = tf.train.create_global_step() main_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4) aux_optimizer = tf.train.AdamOptimizer(learning_rate=1e-3) main_step = main_optimizer.minimize(train_loss, global_step=step) aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0]) train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0]) tf.summary.scalar("loss", train_loss) tf.summary.scalar("bpp", train_bpp) tf.summary.scalar("mse", train_mse) tf.summary.image("original", quantize_image(x)) tf.summary.image("reconstruction", quantize_image(x_tilde)) hooks = [ tf.train.StopAtStepHook(last_step=args.last_step), tf.train.NanTensorHook(train_loss), ] with tf.train.MonitoredTrainingSession(hooks=hooks, checkpoint_dir=args.checkpoint_dir, save_checkpoint_secs=300, save_summaries_secs=60) as sess: while not sess.should_stop(): sess.run(train_op)
import time import tensorflow.compat.v1 as tf # Configuration of cluster worker_hosts = [ "9.134.80.230:9501", "9.134.189.246:9501"] ps_hosts = ["9.134.189.246:9500"] cluster = tf.train.ClusterSpec({"worker": worker_hosts, "ps":ps_hosts}) server=tf.train.Server(cluster,job_name='worker',task_index=1)#找到‘worker’名字下的,task0,也就是机器A with tf.device(tf.train.replica_device_setter()): w = tf.get_variable('w',(1),tf.float32,initializer=tf.constant_initializer(2)) add = tf.add(w, 1) update = tf.assign(w, add) with tf.Session(server.target) as sess: sess.run(tf.global_variables_initializer()) for _ in range(100): print("==============================") print(sess.run(w)) print(sess.run(update)) time.sleep(1)
def train(train_dir, config, dataset_fn, checkpoints_to_keep=5, keep_checkpoint_every_n_hours=1, num_steps=None, master='', num_sync_workers=0, num_ps_tasks=0, task=0): """Train loop.""" tf.gfile.MakeDirs(train_dir) is_chief = (task == 0) if is_chief: _trial_summary(config.hparams, config.train_examples_path or config.tfds_name, train_dir) with tf.Graph().as_default(): with tf.device( tf.train.replica_device_setter(num_ps_tasks, merge_devices=True)): model = config.model model.build(config.hparams, config.data_converter.output_depth, encoder_train=config.encoder_train, decoder_train=config.decoder_train) optimizer = model.train(**_get_input_tensors(dataset_fn(), config)) restored_vars = _get_restore_vars(config.var_train_pattern) _set_trainable_vars(config.var_train_pattern) hooks = [] if num_sync_workers: optimizer = tf.train.SyncReplicasOptimizer( optimizer, num_sync_workers) hooks.append(optimizer.make_session_run_hook(is_chief)) grads, var_list = zip(*optimizer.compute_gradients(model.loss)) global_norm = tf.global_norm(grads) tf.summary.scalar('global_norm', global_norm) if config.hparams.clip_mode == 'value': g = config.hparams.grad_clip clipped_grads = [ tf.clip_by_value(grad, -g, g) for grad in grads ] elif config.hparams.clip_mode == 'global_norm': clipped_grads = tf.cond( global_norm < config.hparams.grad_norm_clip_to_zero, lambda: tf.clip_by_global_norm(grads, config.hparams.grad_clip, use_norm=global_norm)[0], lambda: [tf.zeros(tf.shape(g)) for g in grads]) else: raise ValueError('Unknown clip_mode: {}'.format( config.hparams.clip_mode)) train_op = optimizer.apply_gradients(zip(clipped_grads, var_list), global_step=model.global_step, name='train_step') logging_dict = { 'global_step': model.global_step, 'loss': model.loss } hooks.append( tf.train.LoggingTensorHook(logging_dict, every_n_iter=5)) if num_steps: hooks.append(tf.train.StopAtStepHook(last_step=num_steps)) variables_to_restore = contrib_framework.get_variables_to_restore( include=[v.name for v in restored_vars]) init_assign_op, init_feed_dict = contrib_framework.assign_from_checkpoint( config.pretrained_path, variables_to_restore) def InitAssignFn(scaffold, sess): sess.run(init_assign_op, init_feed_dict) scaffold = tf.train.Scaffold( init_fn=InitAssignFn, saver=tf.train.Saver( max_to_keep=checkpoints_to_keep, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, )) contrib_training.train(train_op=train_op, logdir=train_dir, scaffold=scaffold, hooks=hooks, save_checkpoint_secs=60, master=master, is_chief=is_chief)
def build_graph(model, hparams, scope=None): """build the computation graph.""" utils.print_out("# creating %s graph ..." % model.mode) dtype = tf.float32 num_layers = hparams.num_layers num_gpus = hparams.num_gpus with tf.variable_scope(scope or "dynamic_seq2seq", dtype=dtype): # Encoder # Look up embedding, emp_inp: [max_time, batch_size, num_units] with tf.variable_scope("encoder_emb_inp"): encoder_emb_inp = tf.nn.embedding_lookup(model.embedding_encoder, model.iterator.source) action_emb_inp = tf.nn.embedding_lookup(model.embedding_encoder, model.iterator.action) with tf.variable_scope("encoder1_intent"): res = _build_encoder_simple( model, model.iterator.intent, model.iterator.intent_len, num_units=hparams.encoder_intent_unit) _, encoder_state1_aux, _ = res with tf.variable_scope("encoder2_kb"): res = _build_encoder_hierarchial( model, model.iterator.kb, num_units=hparams.encoder_kb_unit) _, encoder_state2_aux, _ = res with tf.variable_scope("encoder1"): model.encoder_input_projection1 = tf.layers.Dense( hparams.num_units, use_bias=False, name="encoder_1_input_projection") tiled_encoder_state1_aux = tf.reshape( encoder_state1_aux, [model.batch_size, 1, hparams.encoder_intent_unit]) time_step = tf.shape(encoder_emb_inp)[1] tiled_encoder_state1_aux = tf.tile(tiled_encoder_state1_aux, [1, time_step, 1]) concat1 = tf.concat([encoder_emb_inp, tiled_encoder_state1_aux], 2) # emb_intnt+num_unites encoder1_input = model.encoder_input_projection1(concat1) encoder_outputs1, encoder_state1 = _build_encoder( model, encoder1_input, hparams) # 1= customer, 2= agent with tf.variable_scope("encoder2"): model.encoder_input_projection2 = tf.layers.Dense( hparams.num_units, use_bias=False, name="encoder_2_input_projection") tiled_encoder_state2_aux = tf.reshape( encoder_state2_aux, [model.batch_size, 1, hparams.encoder_kb_unit]) time_step = tf.shape(encoder_emb_inp)[1] tiled_encoder_state2_aux = tf.tile(tiled_encoder_state2_aux, [1, time_step, 1]) concat2 = tf.concat([encoder_emb_inp, tiled_encoder_state2_aux], 2) # emb_intnt+num_unites encoder2_input = model.encoder_input_projection2(concat2) encoder_outputs2, encoder_state2 = _build_encoder(model, encoder2_input, hparams) ## Decoder with tf.variable_scope("decoder1"): res = _build_decoder(model, encoder_outputs1, encoder_state1, hparams, vocab_utils.start_of_turn1, vocab_utils.start_of_turn2, model.output_layer1, encoder_state1_aux) logits_trian1, _, sample_id_train1, sample_id_infer1 = res with tf.variable_scope("decoder2"): res = _build_decoder(model, encoder_outputs2, encoder_state2, hparams, vocab_utils.start_of_turn2, vocab_utils.start_of_turn1, model.output_layer2, encoder_state2_aux) logits_trian2, _, sample_id_train2, sample_id_infer2 = res with tf.variable_scope("decoder_action"): res = _build_decoder_action( model, encoder_state2, hparams, hparams.t1.encode(), # dialogue ends with t2, action starts with t1 hparams.t2.encode(), model.output_layer_action) logits_trian3, _, sample_id_train3, sample_id_infer3 = res with tf.variable_scope("value_network1"): res = _build_value_network(model, encoder_emb_inp, action_emb_inp, encoder_state1_aux, model.vn_project11, model.vn_project12, hparams) dialogue1_val, _ = res with tf.variable_scope("value_network2"): res = _build_value_network(model, encoder_emb_inp, action_emb_inp, encoder_state2_aux, model.vn_project21, model.vn_project22, hparams, True) dialogue2_val, action_val = res model.logits_trian1 = logits_trian1 model.logits_trian2 = logits_trian2 model.dialogue1_val = dialogue1_val model.dialogue2_val = dialogue2_val if model.mode in [ tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL, dialogue_utils.mode_self_play_mutable ]: with tf.device(model_helper.get_device_str(num_layers - 1, num_gpus)): sl_loss, sl_loss_arr = _compute_loss(model, logits_trian1, logits_trian2, logits_trian3) with tf.device(model_helper.get_device_str(num_layers - 1, num_gpus)): rl_loss_arr = _compute_loss_selfplay( model, logits_trian1, logits_trian2, logits_trian3, dialogue1_val, dialogue2_val, action_val) elif model.mode == tf.estimator.ModeKeys.PREDICT or model.mode == dialogue_utils.mode_self_play_immutable: sl_loss, sl_loss_arr, rl_loss_arr = None, None, None else: raise ValueError("mode not known") sample_id_arr_train = [sample_id_train1, sample_id_train2, sample_id_train3] sample_id_arr_infer = [sample_id_infer1, sample_id_infer2, sample_id_infer3] return sl_loss, sl_loss_arr, rl_loss_arr, sample_id_arr_train, sample_id_arr_infer
print( "Profiling enabled, repeat count set to one and executing the program once." ) # Create the data queues from/to IPU infeed_queue = ipu.ipu_infeed_queue.IPUInfeedQueue(dataset, "infeed") outfeed_queue = ipu.ipu_outfeed_queue.IPUOutfeedQueue("outfeed") # With batch size BS, gradient accumulation count GAC and repeat count RPT, # at every step n = (BS * GAC * RPT) examples are used. # So in order to evaluate at least N total examples, do ceil(N / n) steps num_train_examples = int(args.epochs * n_examples) examples_per_step = args.batch_size * args.gradient_accumulation_count * args.repeat_count steps = ((num_train_examples - 1) // examples_per_step) + 1 with tf.device('cpu'): lr = tf.placeholder(np.float32, []) with ipu.scopes.ipu_scope("/device:IPU:0"): compiled_model = ipu.ipu_compiler.compile(model, inputs=[lr]) outfeed_op = outfeed_queue.dequeue() ipu.utils.move_variable_initialization_to_cpu() init_op = tf.global_variables_initializer() # Configure the IPU. # With pipelining, IPU-level profiling is needed to correctly visualise the execution trace. # For pipelined models either SNAKE or HOOF IPU selection orders are advised; # the latter works best when the first and last stage are on the same IPU. # For more information, see the API section of the Targeting the IPU from TensorFlow document:
def main(unused_argv=None): tf.logging.set_verbosity(FLAGS.log) if FLAGS.config is None: raise RuntimeError("No config name specified.") config = utils.get_module("wavenet." + FLAGS.config).Config( FLAGS.train_path) logdir = FLAGS.logdir tf.logging.info("Saving to %s" % logdir) with tf.Graph().as_default(): total_batch_size = FLAGS.total_batch_size assert total_batch_size % FLAGS.worker_replicas == 0 worker_batch_size = total_batch_size / FLAGS.worker_replicas # Run the Reader on the CPU cpu_device = "/job:localhost/replica:0/task:0/cpu:0" if FLAGS.ps_tasks: cpu_device = "/job:worker/cpu:0" with tf.device(cpu_device): inputs_dict = config.get_batch(worker_batch_size) with tf.device( tf.train.replica_device_setter(ps_tasks=FLAGS.ps_tasks, merge_devices=True)): global_step = tf.get_variable( "global_step", [], tf.int32, initializer=tf.constant_initializer(0), trainable=False) # pylint: disable=cell-var-from-loop lr = tf.constant(config.learning_rate_schedule[0]) for key, value in config.learning_rate_schedule.items(): lr = tf.cond(tf.less(global_step, key), lambda: lr, lambda: tf.constant(value)) # pylint: enable=cell-var-from-loop tf.summary.scalar("learning_rate", lr) # build the model graph outputs_dict = config.build(inputs_dict, is_training=True) loss = outputs_dict["loss"] tf.summary.scalar("train_loss", loss) worker_replicas = FLAGS.worker_replicas ema = tf.train.ExponentialMovingAverage(decay=0.9999, num_updates=global_step) opt = tf.train.SyncReplicasOptimizer( tf.train.AdamOptimizer(lr, epsilon=1e-8), worker_replicas, total_num_replicas=worker_replicas, variable_averages=ema, variables_to_average=tf.trainable_variables()) train_op = opt.minimize(loss, global_step=global_step, name="train", colocate_gradients_with_ops=True) session_config = tf.ConfigProto(allow_soft_placement=True) is_chief = (FLAGS.task == 0) local_init_op = opt.chief_init_op if is_chief else opt.local_step_init_op slim.learning.train( train_op=train_op, logdir=logdir, is_chief=is_chief, master=FLAGS.master, number_of_steps=config.num_iters, global_step=global_step, log_every_n_steps=250, local_init_op=local_init_op, save_interval_secs=300, sync_optimizer=opt, session_config=session_config, )
def __init__(self, network_name, initializer, regularizer, vocab_size, embedding_size, n_class, batch_size, filter_heights, num_filters, num_units, layers=3, *args, **kwargs): self.network_name = network_name self.initializer = initializer self.regularizer = regularizer self.vocab_size = vocab_size self.n_class = n_class self.batch_size = batch_size self.filter_heights = filter_heights if isinstance(num_filters, list): # isinstance: 判断num_filters对象是不是list,是返回True,否则返回False if len(self.filter_heights) != len(num_filters): raise Exception("filter_heights和num_filters必须长度一致") else: self.num_filters = num_filters elif isinstance(num_filters, int): self.num_filters = [num_filters for _ in self.filter_heights] else: raise Exception("参数num_filters只能是list列表或者int类型的数字!!!") self.embedding_size = embedding_size self.num_units = num_units self.layers = layers with tf.variable_scope(self.network_name, initializer=self.initializer, regularizer=self.regularizer): # 1. Placeholders for input, output, dropout, batch_size with tf.variable_scope("placeholders"): self.input = tf.placeholder(tf.int32, [None, None], name='input_x') self.output = tf.placeholder(tf.int32, [None], name='input_y') self.dropout_keep_prob = tf.placeholder_with_default( 1.0, shape=[], name='dropout_keep_prob') self.batch_size = tf.placeholder_with_default( self.batch_size, shape=[], name='batch_size') # 计算一个批次中序列的长度(因为填充式填充0) # [N,T] -> [N,T] -> [N,T] -> [N,] self.lengths = tf.reduce_sum(tf.sign(tf.abs(self.input)), axis=-1) # 1.5 Embedding Layer with tf.device('/cpu:0'), tf.name_scope("embedding"): self.embedding = tf.Variable( # 指定初始化的范围 tf.random_uniform([self.vocab_size, self.embedding_size], -1.0, 1.0), name="W") # embedded_chars结构为[batch_size, sequence_length, embedding_size], [N, T, E] self.embedded_chars = tf.nn.embedding_lookup( self.embedding, self.input) # 转化为4维的,原本是三维的,tf处理的是4维的,新维度是-1; # [batch_size, sequence_length, embedding_size, channel], [N, T, E, 1] self.embedded_chars_expanded = tf.expand_dims( self.embedded_chars, -1) # 2. Build CNN + LSTM output outputs = [] num_filters_total = 0 print(filter_heights, num_filters) with tf.variable_scope("cnn-rnn"): for idx, filter_height in enumerate(self.filter_heights): with tf.variable_scope("conv-%s" % idx): # Convolution Layer num_filters_total += self.num_filters[idx] # filter_size选几个单词h,embedding_size每个占了多长w 7*5*1 输入1维,输出128维 128个特征图 filter_shape = [ filter_height, self.embedding_size, 1, self.num_filters[idx] ] # 高斯初始化 print(filter_shape) W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.01), name="W") print(W) # 初始化为常量0.1 b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") print(b) conv = tf.nn.conv2d( self.embedded_chars_expanded, W, strides=[1, 1, 1, 1], padding="VALID", # 不做padding name="conv") # Apply nonlinearity: [N, H, W, C] # N: 样本数目(批次大小) # H: 卷积之后的高度: h = length - filter_height + 1 # W: 1 # C: self.num_filters[i] h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") with tf.variable_scope("lstm-%s" % idx): # 0. 构建lstm的输入以及长度(因为这里的卷积不进行填充,序列长度会发生改变) lengths = self.lengths - filter_height + 1 cell_inputs = tf.squeeze( h, axis=2) # [B,T,1,D] -> [B,T,D] # 1. 构建RNN Cell def cell(units): return tf.nn.rnn_cell.BasicLSTMCell(units) cell_fw = tf.nn.rnn_cell.MultiRNNCell(cells=[ cell(self.num_units) for _ in range(self.layers) ]) cell_bw = tf.nn.rnn_cell.MultiRNNCell(cells=[ cell(self.num_units) for _ in range(self.layers) ]) # 2. 动态构建RNN结构 (output_fw, output_bw), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, # 正向RNN Cell cell_bw=cell_bw, # 反向RNN Cell inputs= cell_inputs, # RNN的输入,动态RNN要求输入的数据格式必须为: [B,T,D] sequence_length=lengths, # RNN输入数据的序列长度,必须为: [B,] dtype=cell_inputs.dtype # RNN初始化状态的数据类型 ) # 3. 结果拼接(如果是做反向的LSTM的话,获取最后一个时刻对应的输出值实际上是无用的) batch_size = tf.shape(output_fw)[0] # 获取批次大小 indices_fw = tf.concat( [ tf.reshape(tf.range(batch_size), shape=(-1, 1)), # 样本索引, [0,N-1] tf.reshape( lengths - 1, shape=(-1, 1)) # 样本长度最后一个时刻的索引值, 每个样本的长度信息 ], axis=-1) indices_bw = tf.concat( [ tf.reshape(tf.range(batch_size), shape=(-1, 1)), # 样本索引, [0,N-1] tf.reshape(tf.zeros_like(lengths - 1), shape=(-1, 1)) # 反向获取第一个时刻的值,索引位置为0 ], axis=-1) # 获取对应索引位置的值后,进行拼接 output = tf.concat( ( tf.gather_nd( output_fw, indices_fw ), # 基于索引获取对应位置的值,[B,U], 获取正向的最后一个时刻的值 tf.gather_nd(output_bw, indices_bw ) # 基于索引获取对应位置的值,[B,U], 获取第一个时刻的值 ), axis=-1) outputs.append(output) # 做一个合并 output = tf.concat(outputs, -1) # d. 做一个drop out操作 h_drop = tf.nn.dropout(output, keep_prob=self.dropout_keep_prob) # 3. Build FC output with tf.variable_scope("fc"): in_units = h_drop.get_shape()[-1] w = tf.get_variable(name='w', shape=[in_units, self.n_class]) b = tf.get_variable(name='b', shape=[self.n_class]) self.scores = tf.nn.xw_plus_b(h_drop, weights=w, biases=b, name='scores') self.predictions = tf.argmax(self.scores, axis=1, name='predictions') # 4. Build Loss with tf.variable_scope("loss"): self.losses = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.output, logits=self.scores)) tf.losses.add_loss(self.losses) self.total_loss = tf.losses.get_total_loss(name='total_loss') tf.summary.scalar('total_loss', self.total_loss) tf.summary.scalar('loss', self.losses) # 5. Build Estimate eval with tf.variable_scope("accuracy"): correct_predictions = tf.equal(self.predictions, tf.cast(self.output, tf.int64)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32), name='accuracy') tf.summary.scalar('accuracy', self.accuracy)
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s", name, features[name].shape) is_training = (mode == tf.estimator.ModeKeys.TRAIN) # Initialize sparse tensors. with tf.device("/cpu:0"): tf_e2m_data, tf_e2m_indices, tf_e2m_rowsplits = ( search_utils.load_ragged_matrix("ent2ment", e2m_checkpoint)) with tf.name_scope("RaggedConstruction"): e2m_ragged_ind = tf.RaggedTensor.from_row_splits( values=tf_e2m_indices, row_splits=tf_e2m_rowsplits, validate=False) e2m_ragged_val = tf.RaggedTensor.from_row_splits( values=tf_e2m_data, row_splits=tf_e2m_rowsplits, validate=False) tf_m2e_map = search_utils.load_database("coref", [mips_config.num_mentions], m2e_checkpoint, dtype=tf.int32) entity_ids = search_utils.load_database( "entity_ids", [qa_config.num_entities, qa_config.max_entity_len], entity_id_checkpoint, dtype=tf.int32) entity_mask = search_utils.load_database( "entity_mask", [qa_config.num_entities, qa_config.max_entity_len], entity_mask_checkpoint) _, predictions = create_model_fn( bert_config=bert_config, qa_config=qa_config, mips_config=mips_config, is_training=is_training, features=features, ent2ment_ind=e2m_ragged_ind, ent2ment_val=e2m_ragged_val, ment2ent_map=tf_m2e_map, entity_ids=entity_ids, entity_mask=entity_mask, use_one_hot_embeddings=use_one_hot_embeddings, summary_obj=summary_obj) tvars = tf.trainable_variables() scaffold_fn = None if init_checkpoint: assignment_map, _ = get_assignment_map_from_checkpoint( tvars, init_checkpoint, load_only_bert=qa_config.load_only_bert) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) output_spec = None if mode == tf.estimator.ModeKeys.PREDICT: output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) else: raise ValueError("Only PREDICT mode is supported: %s" % (mode)) return output_spec
def __init__( self, num_unique_documents, vocab_size, num_topics, freqs, embedding_size=128, num_sampled=40, learning_rate=1e-3, lmbda=150.0, alpha=None, power=0.75, batch_size=32, clip_gradients=5.0, **kwargs ): device = get_device(**kwargs) _graph = tf.Graph() with _graph.as_default(): with tf.device(device): moving_avgs = tf.train.ExponentialMovingAverage(0.9) self.batch_size = batch_size self.freqs = freqs self.X = tf.placeholder(tf.int32, shape=[None]) self.Y = tf.placeholder(tf.int64, shape=[None]) self.DOC = tf.placeholder(tf.int32, shape=[None]) self.switch_loss = tf.Variable(0, trainable=False) train_labels = tf.reshape(self.Y, [-1, 1]) sampler = tf.nn.fixed_unigram_candidate_sampler( train_labels, num_true=1, num_sampled=num_sampled, unique=True, range_max=vocab_size, distortion=power, unigrams=self.freqs, ) self.word_embedding = tf.Variable( tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0) ) self.nce_weights = tf.Variable( tf.truncated_normal( [vocab_size, embedding_size], stddev=tf.sqrt(1 / embedding_size), ) ) self.nce_biases = tf.Variable(tf.zeros([vocab_size])) scalar = 1 / np.sqrt(num_unique_documents + num_topics) self.doc_embedding = tf.Variable( tf.random_normal( [num_unique_documents, num_topics], mean=0, stddev=50 * scalar, ) ) self.topic_embedding = tf.get_variable( 'topic_embedding', shape=[num_topics, embedding_size], dtype=tf.float32, initializer=tf.orthogonal_initializer(gain=scalar), ) pivot = tf.nn.embedding_lookup(self.word_embedding, self.X) proportions = tf.nn.embedding_lookup( self.doc_embedding, self.DOC ) doc = tf.matmul(proportions, self.topic_embedding) doc_context = doc word_context = pivot context = tf.add(word_context, doc_context) loss_word2vec = tf.reduce_mean( tf.nn.nce_loss( weights=self.nce_weights, biases=self.nce_biases, labels=self.Y, inputs=context, num_sampled=num_sampled, num_classes=vocab_size, num_true=1, sampled_values=sampler, ) ) self.fraction = tf.Variable( 1, trainable=False, dtype=tf.float32 ) n_topics = self.doc_embedding.get_shape()[1].value log_proportions = tf.nn.log_softmax(self.doc_embedding) if alpha is None: alpha = 1.0 / n_topics loss = (alpha - 1) * log_proportions prior = tf.reduce_sum(loss) loss_lda = lmbda * self.fraction * prior global_step = tf.Variable( 0, trainable=False, name='global_step' ) self.cost = tf.cond( global_step < self.switch_loss, lambda: loss_word2vec, lambda: loss_word2vec + loss_lda, ) loss_avgs_op = moving_avgs.apply( [loss_lda, loss_word2vec, self.cost] ) with tf.control_dependencies([loss_avgs_op]): optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate ) gvs = optimizer.compute_gradients(self.cost) capped_gvs = [ ( tf.clip_by_value( grad, -clip_gradients, clip_gradients ), var, ) for grad, var in gvs ] self.optimizer = optimizer.apply_gradients(capped_gvs) self.sess = generate_session(_graph, **kwargs) self.sess.run(tf.global_variables_initializer())
def main(unused_argv=None): with tf.Graph().as_default(): # Force all input processing onto CPU in order to reserve the GPU for the # forward inference and back-propagation. device = '/cpu:0' if not FLAGS.ps_tasks else '/job:worker/cpu:0' with tf.device( tf.train.replica_device_setter(FLAGS.ps_tasks, worker_device=device)): inputs, _ = image_utils.imagenet_inputs(FLAGS.batch_size, FLAGS.image_size) # Load style images and select one at random (for each graph execution, a # new random selection occurs) style_images, style_labels, \ style_gram_matrices = image_utils.style_image_inputs( os.path.expanduser(FLAGS.style_dataset_file), batch_size=FLAGS.batch_size, image_size=FLAGS.image_size, square_crop=True, shuffle=True) with tf.device(tf.train.replica_device_setter(FLAGS.ps_tasks)): # Process style and weight flags num_styles = FLAGS.num_styles if FLAGS.style_coefficients is None: style_coefficients = [1.0 for _ in range(num_styles)] else: style_coefficients = ast.literal_eval(FLAGS.style_coefficients) if len(style_coefficients) != num_styles: raise ValueError( 'number of style coefficients differs from number of styles' ) content_weights = ast.literal_eval(FLAGS.content_weights) style_weights = ast.literal_eval(FLAGS.style_weights) # Rescale style weights dynamically based on the current style image style_coefficient = tf.gather(tf.constant(style_coefficients), style_labels) style_weights = dict((key, style_coefficient * style_weights[key]) for key in style_weights) # Define the model stylized_inputs = model.transform(inputs, alpha=FLAGS.alpha, normalizer_params={ 'labels': style_labels, 'num_categories': num_styles, 'center': True, 'scale': True }) # Compute losses. total_loss, loss_dict = learning.total_loss( inputs, stylized_inputs, style_gram_matrices, content_weights, style_weights) for key, value in loss_dict.items(): tf.summary.scalar(key, value) # Adding Image summaries to the tensorboard. tf.summary.image('image/0_inputs', inputs, 3) tf.summary.image('image/1_styles', style_images, 3) tf.summary.image('image/2_styled_inputs', stylized_inputs, 3) # Set up training optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate) train_op = slim.learning.create_train_op( total_loss, optimizer, clip_gradient_norm=FLAGS.clip_gradient_norm, summarize_gradients=False) # Function to restore VGG16 parameters. init_fn_vgg = slim.assign_from_checkpoint_fn( vgg.checkpoint_file(), slim.get_variables('vgg_16')) # Run training slim.learning.train(train_op=train_op, logdir=os.path.expanduser(FLAGS.train_dir), master=FLAGS.master, is_chief=FLAGS.task == 0, number_of_steps=FLAGS.train_steps, init_fn=init_fn_vgg, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs)
def train(): with tf.Graph().as_default(): with tf.device('/gpu:' + str(GPU_INDEX)): pointclouds_pl, labels_pl = placeholder_inputs( BATCH_SIZE, NUM_POINT) is_training_pl = tf.placeholder(tf.bool, shape=()) # Note the global_step=batch parameter to minimize. # That tells the optimizer to helpfully increment the 'batch' parameter for you every time it trains. batch = tf.Variable(0) bn_decay = get_bn_decay(batch) tf.summary.scalar('bn_decay', bn_decay) # Get model and loss pred = get_model(pointclouds_pl, is_training_pl, bn_decay=bn_decay) loss = get_loss(pred, labels_pl) tf.summary.scalar('loss', loss) correct = tf.equal(tf.argmax(pred, 2), tf.to_int64(labels_pl)) accuracy = tf.reduce_sum(tf.cast(correct, tf.float32)) / float( BATCH_SIZE * NUM_POINT) tf.summary.scalar('accuracy', accuracy) # Get training operator learning_rate = get_learning_rate(batch) tf.summary.scalar('learning_rate', learning_rate) if OPTIMIZER == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=MOMENTUM) elif OPTIMIZER == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate) train_op = optimizer.minimize(loss, global_step=batch) # Add ops to save and restore all the variables. saver = tf.train.Saver() # Create a session config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True config.log_device_placement = True sess = tf.Session(config=config) # Add summary writers merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'), sess.graph) test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test')) # Init variables init = tf.global_variables_initializer() sess.run(init, {is_training_pl: True}) ops = { 'pointclouds_pl': pointclouds_pl, 'labels_pl': labels_pl, 'is_training_pl': is_training_pl, 'pred': pred, 'loss': loss, 'train_op': train_op, 'merged': merged, 'step': batch } for epoch in range(MAX_EPOCH): log_string('**** EPOCH %03d ****' % (epoch)) sys.stdout.flush() train_one_epoch(sess, ops, train_writer) eval_one_epoch(sess, ops, test_writer) # Save the variables to disk. if epoch % 10 == 0: save_path = saver.save(sess, os.path.join(LOG_DIR, "model.ckpt")) log_string("Model saved in file: %s" % save_path)
def train_a_model(input_seq, mask_seq, label_seq, vocab_size, d_model, head, init_weights, print_output=False): # Clear all stuffs in default graph, so we can start fresh tf.reset_default_graph() with tf.device(USED_DEVICE): # We want each session to have different random seed, but we need each run to have the same random sequence tf.set_random_seed(random.randint(0, 65535)) batch_size = len(input_seq[0]) seq_len = len(input_seq[0][0]) sess = setup_tensorflow_session() (input_tensor, mask_tensor, output_tensor, disagreement_cost, logprob_tensor) = build_model(batch=batch_size, seq_len=seq_len, vocab_size=vocab_size, d_model=d_model, head=head) (label_tensor, train_op, loss, classification_loss) = build_train_graph( output_tensor=output_tensor, batch=batch_size, seq_len=seq_len, d_model=d_model, additional_costs=[disagreement_cost]) sess.run(tf.global_variables_initializer()) if init_weights is not None: set_all_variables(sess, init_weights) for i in range(LOCAL_TRAIN_EPOCH): avg_loss = 0.0 avg_disagreement_loss = 0.0 avg_classification_loss = 0.0 avg_accuracy = 0.0 for input_sample, mask_sample, label_sample in zip( input_seq, mask_seq, label_seq): [ output_vals, loss_vals, disagreement_cost_vals, classification_loss_vals, logprob_vals, _ ] = sess.run( [ output_tensor, loss, disagreement_cost, classification_loss, logprob_tensor, train_op ], feed_dict={ input_tensor: input_sample, mask_tensor: mask_sample, label_tensor: label_sample }) avg_loss = avg_loss + loss_vals avg_disagreement_loss = avg_disagreement_loss + disagreement_cost_vals avg_classification_loss = avg_classification_loss + classification_loss_vals labels = np.array(label_sample) predictions = (logprob_vals >= 0.5).astype(int) scores = (predictions == labels).astype(int) scores = np.average(scores) avg_accuracy = avg_accuracy + scores avg_loss = avg_loss / len(input_seq) avg_disagreement_loss = avg_disagreement_loss / len(input_seq) avg_classification_loss = avg_classification_loss / len( input_seq) avg_accuracy = avg_accuracy / len(input_seq) if print_output: print('EPOCH: ' + str(i)) if print_output: print('=== Input Values ===') print(input_seq) print('=== Label Values ===') print(label_seq) print('=== Output Values ===') print(output_vals) print('=== Loss Values ===') print(avg_loss) print('=== Classification Loss Values ===') print(avg_classification_loss) print('=== Disagreement Loss Values ===') print(avg_disagreement_loss) print('=== Accuracy ===') print(avg_accuracy) trained_weights = get_all_variables(sess) return [ avg_loss, avg_disagreement_loss, avg_classification_loss, avg_accuracy, trained_weights ]