コード例 #1
0
ファイル: events.py プロジェクト: mykolivy/master_thesis
def train(args):
	"""Trains the model."""

	if args.verbose:
		tf.logging.set_verbosity(tf.logging.INFO)

	# Create input data pipeline.
	with tf.device("/cpu:0"):
		train_files = glob.glob(args.train_glob)[:3]
		if not train_files:
			raise RuntimeError("No training images found with glob '{}'.".format(
			    args.train_glob))
		train_dataset = tf.data.TextLineDataset(
		    train_files,
		    compression_type=None,
		    buffer_size=len(train_files),
		    num_parallel_reads=args.preprocess_threads)
		train_dataset = train_dataset.map(
		    string_to_tensor, num_parallel_calls=args.preprocess_threads)
		train_dataset = train_dataset.shuffle(buffer_size=len(train_files)).repeat()
		train_dataset = train_dataset.batch(args.batchsize)
		train_dataset = train_dataset.prefetch(32)

	num_pixels = args.batchsize * 128

	# Get training patch from dataset.
	x = train_dataset.make_one_shot_iterator().get_next()

	# Instantiate model.
	analysis_transform = AnalysisTransform(32)
	entropy_bottleneck = tfc.EntropyBottleneck()
	synthesis_transform = SynthesisTransform(32)

	# Build autoencoder.
	y = analysis_transform(x)
	y_tilde, likelihoods = entropy_bottleneck(y, training=True)
	x_tilde = synthesis_transform(y_tilde)
	timestamps, polarities = tf.split(x_tilde, num_or_size_splits=2, axis=-1)
	timestamps = tf.math.abs(timestamps)
	polarities = tf.math.tanh(polarities)
	x_tilde = tf.concat([timestamps, polarities], axis=-1)

	train_bpp = tf.reduce_mean(
	    -tf.reduce_sum(likelihoods * tf.log(likelihoods), axis=[1, 2]) /
	    np.log(2))

	# Mean squared error across pixels.
	train_mse = tf.reduce_mean((x - x_tilde)**2.)

	# The rate-distortion cost.
	train_loss = args.lmbda * train_mse + train_bpp

	# Minimize loss and auxiliary loss, and execute update op.
	step = tf.train.create_global_step()
	main_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4)
	main_step = main_optimizer.minimize(train_loss, global_step=step)

	aux_optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)
	aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0])

	train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0])

	tf.summary.scalar("loss", train_loss)
	tf.summary.scalar("bpp", train_bpp)
	tf.summary.scalar("mse", train_mse)

	hooks = [
	    tf.train.StopAtStepHook(last_step=args.last_step),
	    tf.train.NanTensorHook(train_loss),
	]
	with tf.train.MonitoredTrainingSession(hooks=hooks,
	                                       checkpoint_dir=args.checkpoint_dir,
	                                       save_checkpoint_secs=300,
	                                       save_summaries_secs=60) as sess:
		while not sess.should_stop():
			sess.run(train_op)
コード例 #2
0
ファイル: image_utils.py プロジェクト: wangeen/magenta
def arbitrary_style_image_inputs(style_dataset_file,
                                 batch_size=None,
                                 image_size=None,
                                 center_crop=True,
                                 shuffle=True,
                                 augment_style_images=False,
                                 random_style_image_size=False,
                                 min_rand_image_size=128,
                                 max_rand_image_size=300):
    """Loads a batch of random style image given the path of tfrecord dataset.

  This method does not return pre-compute Gram matrices for the images like
  style_image_inputs. But it can provide data augmentation. If
  augment_style_images is equal to True, then style images will randomly
  modified (eg. changes in brightness, hue or saturation) for data
  augmentation. If random_style_image_size is set to True then all images
  in one batch will be resized to a random size.
  Args:
    style_dataset_file: str, path to the tfrecord dataset of style files.
    batch_size: int. If provided, batches style images. Defaults to None.
    image_size: int. The images will be resized bilinearly so that the smallest
        side has size image_size. Defaults to None.
    center_crop: bool. If True, center-crops to [image_size, image_size].
        Defaults to False.
    shuffle: bool, whether to shuffle style files at random. Defaults to False.
    augment_style_images: bool. Wheather to augment style images or not.
    random_style_image_size: bool. If this value is True, then all the style
        images in one batch will be resized to a random size between
        min_rand_image_size and max_rand_image_size.
    min_rand_image_size: int. If random_style_image_size is True, this value
        specifies the minimum image size.
    max_rand_image_size: int. If random_style_image_size is True, this value
        specifies the maximum image size.

  Returns:
    4-D tensor of shape [1, ?, ?, 3] with values in [0, 1] for the style
    image (with random changes for data augmentation if
    augment_style_image_size is set to true), and 0-D tensor for the style
    label, 4-D tensor of shape [1, ?, ?, 3] with values in [0, 1] for the style
    image without random changes for data augmentation.

  Raises:
    ValueError: if center cropping is requested but no image size is provided,
        or if batch size is specified but center-cropping or
        augment-style-images is not requested,
        or if both augment-style-images and center-cropping are requested.
  """
    if center_crop and image_size is None:
        raise ValueError('center-cropping requires specifying the image size.')
    if center_crop and augment_style_images:
        raise ValueError(
            'When augment_style_images is true images will be randomly cropped.'
        )
    if batch_size is not None and not center_crop and not augment_style_images:
        raise ValueError(
            'batching requires same image sizes (Set center-cropping or '
            'augment_style_images to true)')

    with tf.name_scope('style_image_processing'):
        # Force all input processing onto CPU in order to reserve the GPU for the
        # forward inference and back-propagation.
        with tf.device('/cpu:0'):
            filename_queue = tf.train.string_input_producer(
                [style_dataset_file],
                shuffle=False,
                capacity=1,
                name='filename_queue')
            if shuffle:
                examples_queue = tf.RandomShuffleQueue(
                    capacity=64,
                    min_after_dequeue=32,
                    dtypes=[tf.string],
                    name='random_examples_queue')
            else:
                examples_queue = tf.FIFOQueue(capacity=64,
                                              dtypes=[tf.string],
                                              name='fifo_examples_queue')
            reader = tf.TFRecordReader()
            _, value = reader.read(filename_queue)
            enqueue_ops = [examples_queue.enqueue([value])]
            tf.train.queue_runner.add_queue_runner(
                tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops))
            example_serialized = examples_queue.dequeue()
            features = tf.parse_single_example(
                example_serialized,
                features={
                    'label': tf.FixedLenFeature([], tf.int64),
                    'image_raw': tf.FixedLenFeature([], tf.string)
                })
            image = tf.image.decode_jpeg(features['image_raw'])
            image.set_shape([None, None, 3])
            label = features['label']

            if image_size is not None:
                image_channels = int(image.shape[2])
                if augment_style_images:
                    image_orig = image
                    image = tf.image.random_brightness(image, max_delta=0.8)
                    image = tf.image.random_saturation(image,
                                                       lower=0.5,
                                                       upper=1.5)
                    image = tf.image.random_hue(image, max_delta=0.2)
                    image = tf.image.random_flip_left_right(image)
                    image = tf.image.random_flip_up_down(image)
                    random_larger_image_size = tf.random_uniform(
                        [],
                        minval=image_size + 2,
                        maxval=image_size + 200,
                        dtype=tf.int32)
                    image = _aspect_preserving_resize(
                        image, random_larger_image_size)
                    image = tf.random_crop(
                        image, size=[image_size, image_size, image_channels])
                    image.set_shape([image_size, image_size, image_channels])

                    image_orig = _aspect_preserving_resize(
                        image_orig, image_size + 2)
                    image_orig = _central_crop([image_orig], image_size,
                                               image_size)[0]
                    image_orig.set_shape([image_size, image_size, 3])
                elif center_crop:
                    image = _aspect_preserving_resize(image, image_size + 2)
                    image = _central_crop([image], image_size, image_size)[0]
                    image.set_shape([image_size, image_size, image_channels])
                    image_orig = image
                else:
                    image = _aspect_preserving_resize(image, image_size)
                    image_orig = image

            image = tf.to_float(image) / 255.0
            image_orig = tf.to_float(image_orig) / 255.0

            if batch_size is None:
                image = tf.expand_dims(image, 0)
            else:
                [image, image_orig,
                 label] = tf.train.batch([image, image_orig, label],
                                         batch_size=batch_size)

            if random_style_image_size:
                # Selects a random size for the style images and resizes all the images
                # in the batch to that size.
                image = _aspect_preserving_resize(
                    image,
                    tf.random_uniform([],
                                      minval=min_rand_image_size,
                                      maxval=max_rand_image_size,
                                      dtype=tf.int32))

            return image, label, image_orig
コード例 #3
0
    def _load_model(self):
        """
        Define and instantiate the computation graph.
        """
        import tensorflow.compat.v1 as tf1
        from lingvo import model_registry, model_imports
        from lingvo.core import cluster_factory

        from asr.librispeech import Librispeech960Wpm

        # check and download patched Lingvo ASR decoder
        _ = self._check_and_download_file(
            self._LINGVO_CFG["decoder"]["uri"], self._LINGVO_CFG["decoder"]["basename"], self._LINGVO_CFG["path"], "asr"
        )

        # monkey-patch the lingvo.asr.decoder.AsrDecoderBase._ComputeMetrics method with patched method according
        # to Qin et al
        import lingvo.tasks.asr.decoder as decoder
        import asr.decoder_patched as decoder_patched

        decoder.AsrDecoderBase._ComputeMetrics = decoder_patched.AsrDecoderBase._ComputeMetrics

        # check and download Lingvo ASR vocab
        # vocab_path = self._check_and_download_vocab()
        vocab_path = self._check_and_download_file(
            self._LINGVO_CFG["vocab"]["uri"], self._LINGVO_CFG["vocab"]["basename"], self._LINGVO_CFG["path"], "asr"
        )

        # monkey-patch tasks.asr.librispeechLibriSpeech960Wpm class attribute WPM_SYMBOL_TABLE_FILEPATH
        Librispeech960Wpm.WPM_SYMBOL_TABLE_FILEPATH = vocab_path

        # register model params
        model_name = "asr.librispeech.Librispeech960Wpm"
        model_imports.ImportParams(model_name)
        params = model_registry._ModelRegistryHelper.GetParams(model_name, "Test")

        # set random seed parameter
        if self.random_seed is not None:
            params.random_seed = self.random_seed

        # instantiate Lingvo ASR model
        cluster = cluster_factory.Cluster(params.cluster)
        with cluster, tf1.device(cluster.GetPlacer()):
            model = params.Instantiate()
            task = model.GetTask()

        # load Qin et al. pretrained model
        _ = self._check_and_download_file(
            self._LINGVO_CFG["model_data"]["uri"],
            self._LINGVO_CFG["model_data"]["basename"],
            self._LINGVO_CFG["path"],
            "asr",
            "model",
        )
        model_index_path = self._check_and_download_file(
            self._LINGVO_CFG["model_index"]["uri"],
            self._LINGVO_CFG["model_index"]["basename"],
            self._LINGVO_CFG["path"],
            "asr",
            "model",
        )
        self._sess.run(tf1.global_variables_initializer())
        saver = tf1.train.Saver([var for var in tf1.global_variables() if var.name.startswith("librispeech")])
        saver.restore(self._sess, os.path.splitext(model_index_path)[0])

        # set 'enable_asserts'-flag to False (Note: this flag ensures correct GPU support)
        tf1.flags.FLAGS.enable_asserts = False

        return model, task, cluster
コード例 #4
0
ファイル: zip_test_utils.py プロジェクト: chrisvon62/AiBot
            def build_example(label, param_dict_real, zip_path_label):
                """Build the model with parameter values set in param_dict_real.

        Args:
          label: Label of the model
          param_dict_real: Parameter dictionary (arguments to the factories
            make_graph and make_test_inputs)
          zip_path_label: Filename in the zip

        Returns:
          (tflite_model_binary, report) where tflite_model_binary is the
          serialized flatbuffer as a string and report is a dictionary with
          keys `toco_log` (log of toco conversion), `tf_log` (log of tf
          conversion), `toco` (a string of success status of the conversion),
          `tf` (a string success status of the conversion).
        """

                np.random.seed(RANDOM_SEED)
                report = {"toco": report_lib.NOTRUN, "tf": report_lib.FAILED}

                # Build graph
                report["tf_log"] = ""
                report["toco_log"] = ""
                tf.reset_default_graph()

                with tf.Graph().as_default():
                    with tf.device("/cpu:0"):
                        try:
                            inputs, outputs = make_graph(param_dict_real)
                        except (tf.errors.UnimplementedError,
                                tf.errors.InvalidArgumentError, ValueError):
                            report["tf_log"] += traceback.format_exc()
                            return None, report

                    sess = tf.Session()
                    try:
                        baseline_inputs, baseline_outputs = (make_test_inputs(
                            param_dict_real, sess, inputs, outputs))
                    except (tf.errors.UnimplementedError,
                            tf.errors.InvalidArgumentError, ValueError):
                        report["tf_log"] += traceback.format_exc()
                        return None, report
                    report["toco"] = report_lib.FAILED
                    report["tf"] = report_lib.SUCCESS
                    # Convert graph to toco
                    input_tensors = [(input_tensor.name.split(":")[0],
                                      input_tensor.shape, input_tensor.dtype)
                                     for input_tensor in inputs]
                    output_tensors = [
                        _normalize_output_name(out.name) for out in outputs
                    ]
                    # pylint: disable=g-long-ternary
                    graph_def = freeze_graph(
                        sess,
                        tf.global_variables() + inputs +
                        outputs) if use_frozen_graph else sess.graph_def

                if "split_tflite_lstm_inputs" in param_dict_real:
                    extra_toco_options.split_tflite_lstm_inputs = param_dict_real[
                        "split_tflite_lstm_inputs"]
                tflite_model_binary, toco_log = options.tflite_convert_function(
                    options,
                    graph_def,
                    input_tensors,
                    output_tensors,
                    extra_toco_options=extra_toco_options,
                    test_params=param_dict_real)
                report["toco"] = (report_lib.SUCCESS if tflite_model_binary
                                  is not None else report_lib.FAILED)
                report["toco_log"] = toco_log

                if options.save_graphdefs:
                    archive.writestr(zip_path_label + ".pbtxt",
                                     text_format.MessageToString(graph_def),
                                     zipfile.ZIP_DEFLATED)

                if tflite_model_binary:
                    if options.make_edgetpu_tests:
                        # Set proper min max values according to input dtype.
                        baseline_inputs, baseline_outputs = generate_inputs_outputs(
                            tflite_model_binary, min_value=0, max_value=255)
                    archive.writestr(zip_path_label + ".bin",
                                     tflite_model_binary, zipfile.ZIP_DEFLATED)
                    example = {
                        "inputs": baseline_inputs,
                        "outputs": baseline_outputs
                    }

                    example_fp = StringIO()
                    write_examples(example_fp, [example])
                    archive.writestr(zip_path_label + ".inputs",
                                     example_fp.getvalue(),
                                     zipfile.ZIP_DEFLATED)

                    example_fp2 = StringIO()
                    write_test_cases(example_fp2, zip_path_label + ".bin",
                                     [example])
                    archive.writestr(zip_path_label + "_tests.txt",
                                     example_fp2.getvalue(),
                                     zipfile.ZIP_DEFLATED)

                    zip_manifest_label = zip_path_label + " " + label
                    if zip_path_label == label:
                        zip_manifest_label = zip_path_label

                    zip_manifest.append(zip_manifest_label + "\n")

                return tflite_model_binary, report
コード例 #5
0
def train_note_values_conv_net(test_data_arr, test_data_label, train_data_arr,
                               train_data_label):
    """
    This function trains the convolutional network for recognizing note values based on input data.
    Tutorial for this code found here:
    https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/keras/classification.ipynb
    The results are saved on a disk so that they can be used without retraining the network.
    :param train_data_label: Labels with names and durations for the train data images.
    :param train_data_arr: Array containing the train images.
    :param test_data_label: Labels with names and durations for the test data images.
    :param test_data_arr: Array containing the test images.
    """

    gpus = tf.config.experimental.list_physical_devices('GPU')
    tf.config.experimental.set_memory_growth(gpus[0], True)
    os.environ[
        'TF_CPP_MIN_LOG_LEVEL'] = '2'  # Alleged fix for some tensorflow bugs.

    construct_output(
        indent_level=0,
        message="Convolutional Network 1 (Note value determining).")

    # Scale these values to a range of 0 to 1 before feeding them to the convolutional network model
    print("Scaling test values to [0-1] range.")
    test_data_arr = test_data_arr / 255.0
    print("Scaling train values to [0-1] range (this will take a while).")
    train_data_arr = train_data_arr / 255.0

    # Construct the path for saving the results of training.
    saved_model_values_path = os.path.abspath(
        os.path.join(str(Path(__file__).parent.parent.parent), 'resources'))
    saved_model_values_path = os.path.join(saved_model_values_path,
                                           'saved_models')
    saved_model_name = "value_processing_net_saved.ckpt"
    saved_model_values_path = os.path.join(saved_model_values_path,
                                           saved_model_name)
    values_model_cb = tf.keras.callbacks.ModelCheckpoint(
        filepath=saved_model_values_path, save_weights_only=True, verbose=1)

    # First network only recognizes the values. No need to feed it unrecognized elements (elements with no value).
    value_network_train_data_arr = np.array([
        x for i, x in enumerate(train_data_arr)
        if train_data_label[i][0][0] != "Uncategorized"
    ])
    value_network_train_data_label = np.array([(x[0][0], x[1])
                                               for x in train_data_label
                                               if x[0][0] != "Uncategorized"])

    value_network_test_data_arr = np.array([
        x for i, x in enumerate(test_data_arr)
        if test_data_label[i][0][0] != "Uncategorized"
    ])
    value_network_test_data_label = np.array([(x[0][0], x[1])
                                              for x in test_data_label
                                              if x[0][0] != "Uncategorized"])

    class_names = [
        "A3",
        "A4",
        "A5",  # class_names contains possible results
        "B3",
        "B4",
        "B5",
        "C3",
        "C4",
        "C5",
        "D3",
        "D4",
        "D5",
        "E3",
        "E4",
        "E5",
        "F3",
        "F4",
        "F5",
        "G3",
        "G4",
        "G5"
    ]

    # Fetch only the labels (note values) from the data.
    value_network_train_data_label = [
        item[0] for item in value_network_train_data_label
    ]

    # Assign the corresponding numerical values to labels.
    value_network_train_data_label_values_numerical = values_to_numerical(
        value_network_train_data_label, class_names)

    with tf.device(
            '/GPU:1'
    ):  # Specify using nvidia discrete GPU instead of Intel integrated graphics.
        construct_output(indent_level=0, message="Start training.")
        # Set up the layers.
        # The first layer in this network, tf.keras.layers.Flatten, transforms the format of the images
        # from a 2D array(200x200px) to 1D array(of 200x200 = 40000 pixels)
        # After  the pixels are flattened, the network consists of a sequence of two tf.keras.layers.Dense layers.
        # These are densely connected, or fully connected, neural layers.
        # The first Dense layer has 128 nodes( or neurons).
        # The second( and last) layer returns an array with length of 22.
        # Each node contains a score that indicates the current image belongs to one of the 22 classes.
        model = tf.keras.Sequential([
            tf.keras.layers.Flatten(input_shape=(200, 200)),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dense(22)
        ])

        # Before the model is ready for training, it needs a few more settings.
        # These are added during the model's compile step:
        # Loss function —This measures how accurate the model is during training.
        # You want to minimize this function to "steer" the model in the right direction.
        # Optimizer —This is how the model is updated based on the data it sees and its loss function.
        # Metrics —Used to monitor the training and testing steps.
        # The following example uses accuracy, the fraction of the images that are correctly classified.
        model.compile(optimizer='adam',
                      loss=tf.keras.losses.SparseCategoricalCrossentropy(
                          from_logits=True),
                      metrics=['accuracy'])

        # Training the convolutional network model requires the following steps:
        # Feed the training data to the model.
        # In this example, the training data is in the train_images and train_labels arrays.
        # The model learns to associate images and labels.
        # You ask the model to make predictions about a test set—in this example, the test_images array.
        # Verify that the predictions match the labels from the test_labels array.
        model.fit(value_network_train_data_arr,
                  value_network_train_data_label_values_numerical,
                  epochs=3,
                  callbacks=[values_model_cb])
        construct_output(
            indent_level=0,
            message="Save the network weights to avoid retraining on every run."
        )

        # Attach a softmax layer to convert the logits to probabilities, which are easier to interpret.
        probability_model = tf.keras.Sequential(
            [model, tf.keras.layers.Softmax()])

        # TESTING THE NETWORK. =======================================================================================
        # Compare how the model performs on the test dataset.
        # value_network_test_data_label = [item[0] for item in value_network_test_data_label]
        # value_network_test_data_label_values_numerical = values_to_numerical(
        #     value_network_test_data_label,
        #     class_names)
        # test_loss, test_acc = model.evaluate(value_network_test_data_arr,
        #                                      value_network_test_data_label_values_numerical,
        #                                      verbose=2
        #                                      )
        # print('\nTest accuracy:', test_acc)
        # predictions = probability_model.predict(value_network_test_data_arr)
        # print(predictions[0])
        # print("max= ", np.argmax(predictions[0]))
        # import cv2
        # cv2.imshow("img", value_network_test_data_arr[0])
        # cv2.waitKey()

        construct_output(indent_level=0, message="End training.")
        construct_output(
            indent_level=0,
            message="Convolutional Network 1 (Note value determining) Done.")
コード例 #6
0
 def _custom_parsing_context(self):
     dev_spec = tf.DeviceSpec(
         device_type=("GPU" if self.device_option.is_gpu() else "CPU"),
         device_index=self.device_option.num)
     return tf.device(dev_spec)
コード例 #7
0
def main(unused_argv=None):
    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        # Forces all input processing onto CPU in order to reserve the GPU for the
        # forward inference and back-propagation.
        device = '/cpu:0' if not FLAGS.ps_tasks else '/job:worker/cpu:0'
        with tf.device(
                tf.train.replica_device_setter(FLAGS.ps_tasks,
                                               worker_device=device)):
            # Loads content images.
            content_inputs_, _ = image_utils.imagenet_inputs(
                FLAGS.batch_size, FLAGS.image_size)

            # Loads style images.
            [style_inputs_, _, _] = image_utils.arbitrary_style_image_inputs(
                FLAGS.style_dataset_file,
                batch_size=FLAGS.batch_size,
                image_size=FLAGS.image_size,
                shuffle=True,
                center_crop=FLAGS.center_crop,
                augment_style_images=FLAGS.augment_style_images,
                random_style_image_size=FLAGS.random_style_image_size)

        with tf.device(tf.train.replica_device_setter(FLAGS.ps_tasks)):
            # Process style and content weight flags.
            content_weights = ast.literal_eval(FLAGS.content_weights)
            style_weights = ast.literal_eval(FLAGS.style_weights)

            # Define the model
            stylized_images, total_loss, loss_dict, \
                  _ = build_mobilenet_model.build_mobilenet_model(
                      content_inputs_,
                      style_inputs_,
                      mobilenet_trainable=False,
                      style_params_trainable=True,
                      transformer_trainable=True,
                      mobilenet_end_point='layer_19',
                      transformer_alpha=FLAGS.alpha,
                      style_prediction_bottleneck=100,
                      adds_losses=True,
                      content_weights=content_weights,
                      style_weights=style_weights,
                      total_variation_weight=FLAGS.total_variation_weight,
                  )

            # Adding scalar summaries to the tensorboard.
            for key, value in loss_dict.items():
                tf.summary.scalar(key, value)

            # Adding Image summaries to the tensorboard.
            tf.summary.image('image/0_content_inputs', content_inputs_, 3)
            tf.summary.image('image/1_style_inputs_aug', style_inputs_, 3)
            tf.summary.image('image/2_stylized_images', stylized_images, 3)

            # Set up training
            optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
            train_op = slim.learning.create_train_op(
                total_loss,
                optimizer,
                clip_gradient_norm=FLAGS.clip_gradient_norm,
                summarize_gradients=False)

            # Function to restore VGG16 parameters.
            init_fn_vgg = slim.assign_from_checkpoint_fn(
                vgg.checkpoint_file(), slim.get_variables('vgg_16'))

            # Function to restore Mobilenet V2 parameters.
            mobilenet_variables_dict = {
                var.op.name: var
                for var in slim.get_model_variables('MobilenetV2')
            }
            init_fn_mobilenet = slim.assign_from_checkpoint_fn(
                FLAGS.mobilenet_checkpoint, mobilenet_variables_dict)

            # Function to restore VGG16 and Mobilenet V2 parameters.
            def init_sub_networks(session):
                init_fn_vgg(session)
                init_fn_mobilenet(session)

            # Run training
            slim.learning.train(train_op=train_op,
                                logdir=os.path.expanduser(FLAGS.train_dir),
                                master=FLAGS.master,
                                is_chief=FLAGS.task == 0,
                                number_of_steps=FLAGS.train_steps,
                                init_fn=init_sub_networks,
                                save_summaries_secs=FLAGS.save_summaries_secs,
                                save_interval_secs=FLAGS.save_interval_secs)
コード例 #8
0
def multi_hop_fact(qry_input_ids,
                   qry_input_mask,
                   qry_entity_ids,
                   entity_ids,
                   entity_mask,
                   ent2fact_ind,
                   ent2fact_val,
                   fact2ent_ind,
                   fact2ent_val,
                   fact2fact_ind,
                   fact2fact_val,
                   is_training,
                   use_one_hot_embeddings,
                   bert_config,
                   qa_config,
                   fact_mips_config,
                   num_hops,
                   exclude_set=None,
                   is_printing=True):
  """Multi-hops of propagation from input to output facts.

  Args:
    qry_input_ids:
    qry_input_mask:
    qry_entity_ids:
    entity_ids: (entity_word_ids) [num_entities, max_entity_len] Tensor holding
      word ids of each entity.
    entity_mask: (entity_word_masks) [num_entities, max_entity_len] Tensor with
      masks into word ids above.
    ent2fact_ind:
    ent2fact_val:
    fact2ent_ind:
    fact2ent_val:
    fact2fact_ind:
    fact2fact_val:
    is_training:
    use_one_hot_embeddings:
    bert_config:
    qa_config:
    fact_mips_config:
    num_hops:
    exclude_set:
    is_printing:

  Returns:
    layer_entities:
    layer_facts:
    layer_dense:
    layer_sp:
    batch_entities_nosc:
    qry_seq_emb:
  """
  del entity_ids, entity_mask, exclude_set  # Not used for now.
  # MIPS search for facts.  Build fact feature Database
  with tf.device("/cpu:0"):
    tf_fact_db, fact_mips_search_fn = search_utils.create_mips_searcher(
        fact_mips_config.ckpt_var_name,
        # [fact_mips_config.num_facts, fact_mips_config.emb_size],
        fact_mips_config.ckpt_path,
        fact_mips_config.num_neighbors,
        local_var_name="scam_init_barrier_fact")

  # for question BOW embedding
  with tf.variable_scope("qry/bow"):
    # trainable word weights over the BERT vocab for all query embeddings.
    word_weights = tf.get_variable(
        "word_weights", [bert_config.vocab_size, 1],
        dtype=tf.float32,
        initializer=tf.ones_initializer())
  qry_seq_emb, word_emb_table = model_utils.shared_qry_encoder_v2(
      qry_input_ids, qry_input_mask, is_training, use_one_hot_embeddings,
      bert_config, qa_config)

  del word_weights, word_emb_table  # Not used for now.

  batch_size = tf.shape(qry_input_ids)[0]
  # Get question entities w/o scores.
  batch_qry_entities = tf.SparseTensor(
      indices=tf.concat([
          qry_entity_ids.indices[:, 0:1],
          tf.cast(tf.expand_dims(qry_entity_ids.values, 1), tf.int64)
      ],
                        axis=1),
      values=tf.ones_like(qry_entity_ids.values, dtype=tf.float32),
      dense_shape=[batch_size, qa_config.num_entities])
  # Prepare initial facts.
  initial_facts = model_utils.sparse_ragged_mul(
      batch_qry_entities,
      ent2fact_ind,
      ent2fact_val,
      batch_size,
      fact_mips_config.num_facts,
      "sum",  # max or sum
      threshold=None,
      fix_values_to_one=True)

  # Note: set a hyper parameter in qa.config
  # Note: can we do top k here for sparse tensor?
  # Limit the number of init facts such that we won't have too many facts.

  # mask = tf.greater(initial_facts.values, 1)  # >= 2 qry concepts
  # initial_facts = tf.sparse.retain(initial_facts, mask)

  scaled_initial_facts = maxscale_spare_tensor(initial_facts)
  mask_thresold = tf.greater(scaled_initial_facts.values, 0.25)
  final_initial_facts = tf.sparse.retain(scaled_initial_facts, mask_thresold)

  if is_printing:
    tmp_vals = final_initial_facts.values
    tmp_vals = tf.compat.v1.Print(
        input_=tmp_vals,
        data=[
            tf.shape(initial_facts.indices),
            initial_facts.values,
        ],
        message="-" * 100 + "\n\n ## Initial Facts (at hop 0):\n"
        "shape(initial_facts), initial_facts.values,",
        first_n=10,
        summarize=52)
    tmp_vals = tf.compat.v1.Print(
        input_=tmp_vals,
        data=[
            tf.shape(scaled_initial_facts.indices),
            scaled_initial_facts.values,
        ],
        message="shape(scaled_initial_facts), scaled_initial_facts.values,",
        first_n=10,
        summarize=52)
    tmp_vals = tf.compat.v1.Print(
        input_=tmp_vals,
        data=[
            tf.shape(final_initial_facts.indices),
            final_initial_facts.values,
        ],
        message="shape(final_initial_facts), final_initial_facts.values,",
        first_n=10,
        summarize=52)

    final_initial_facts = tf.SparseTensor(final_initial_facts.indices, tmp_vals,
                                          final_initial_facts.dense_shape)
  layer_facts, layer_entities = [], []
  layer_dense, layer_sp = [], []
  batch_facts = final_initial_facts
  for hop in range(num_hops):
    with tf.name_scope("hop_%d" % hop):
      # The question start/end embeddings for each hop.
      qry_start_emb, qry_end_emb = model_utils.layer_qry_encoder(
          qry_seq_emb,
          qry_input_ids,
          qry_input_mask,
          is_training,
          bert_config,
          qa_config,
          suffix="_%d" % hop,
          project_dim=qa_config.projection_dim)  # project=True
      ret_entities, ret_facts, _, _ = follow_fact(
          batch_facts, qry_start_emb, qry_end_emb, fact2fact_ind, fact2fact_val,
          fact2ent_ind, fact2ent_val, fact_mips_search_fn, tf_fact_db,
          fact_mips_config, qa_config, is_training, hop, is_printing)
      batch_facts = ret_facts  # Update to next hop.
      # Update results.
      layer_facts.append(ret_facts)
      layer_entities.append(ret_entities)

  tf.logging.info("len layer_facts: %d", len(layer_facts))
  tf.logging.info("len layer_entities: %d", len(layer_entities))
  return (layer_entities, layer_facts, layer_dense, layer_sp,
          batch_qry_entities, initial_facts, qry_seq_emb)
コード例 #9
0
def multi_hop_mention(qry_input_ids,
                      qry_input_mask,
                      qry_entity_ids,
                      entity_ids,
                      entity_mask,
                      ent2ment_ind,
                      ent2ment_val,
                      ment2ent_map,
                      is_training,
                      use_one_hot_embeddings,
                      bert_config,
                      qa_config,
                      mips_config,
                      num_hops,
                      exclude_set=None,
                      bridge_mentions=None,
                      answer_mentions=None):  # answer mentions?
  """Multi-hops of propagation from input to output entities.

  Args:
    qry_input_ids:
    qry_input_mask:
    qry_entity_ids:
    entity_ids: (entity_word_ids) [num_entities, max_entity_len] Tensor holding
      word ids of each entity.
    entity_mask: (entity_word_masks) [num_entities, max_entity_len] Tensor with
      masks into word ids above.
    ent2ment_ind:
    ent2ment_val:
    ment2ent_map:
    is_training:
    use_one_hot_embeddings:
    bert_config:
    qa_config:
    mips_config:
    num_hops:
    exclude_set:
    bridge_mentions:
    answer_mentions:

  Returns:
    layer_entities:
    layer_mentions:
    layer_dense:
    layer_sp:
    batch_entities_nosc:
    qry_seq_emb:
  """
  # for question BOW embedding
  with tf.variable_scope("qry/bow"):
    # Note: trainable word weights over the BERT vocab for query
    word_weights = tf.get_variable(
        "word_weights", [bert_config.vocab_size, 1],
        dtype=tf.float32,
        initializer=tf.ones_initializer())
  # Note: we can use the [CLS] token here?
  qry_seq_emb, word_emb_table = model_utils.shared_qry_encoder_v2(
      qry_input_ids, qry_input_mask, is_training, use_one_hot_embeddings,
      bert_config, qa_config)

  batch_size = tf.shape(qry_input_ids)[0]
  # Multiple entities per question. We need to re-score.
  with tf.name_scope("entity_linking"):
    batch_entity_emb = model_utils.entity_emb(
        tf.cast(qry_entity_ids.values, tf.int64), entity_ids, entity_mask,
        word_emb_table, word_weights)  # question entity embeddings.
    # Embed query into start and end vectors for dense retrieval for a hop.
    qry_el_emb, _ = model_utils.layer_qry_encoder(  # question embeddings
        qry_seq_emb,
        qry_input_ids,
        qry_input_mask,
        is_training,
        bert_config,
        qa_config,
        suffix="_el",
        project=False)
    batch_qry_el_emb = tf.gather(qry_el_emb, qry_entity_ids.indices[:, 0])
    batch_entity_el_scs = tf.reduce_sum(batch_qry_el_emb * batch_entity_emb, -1)
    batch_entities_nosc = tf.SparseTensor(
        # Note: double check this.
        indices=tf.concat([
            qry_entity_ids.indices[:, 0:1],
            tf.cast(tf.expand_dims(qry_entity_ids.values, 1), tf.int64)
        ],
                          axis=1),
        values=batch_entity_el_scs,
        dense_shape=[batch_size, qa_config.num_entities])
    batch_entities = tf.sparse.softmax(tf.sparse.reorder(batch_entities_nosc))

  ensure_mentions = bridge_mentions  # Note: check "supporoting facts"

  with tf.device("/cpu:0"):
    # MIPS search for mentions. Mention Feature Database
    tf_db, mips_search_fn = search_utils.create_mips_searcher(
        mips_config.ckpt_var_name,
        # [mips_config.num_mentions, mips_config.emb_size],
        mips_config.ckpt_path,
        mips_config.num_neighbors,
        local_var_name="scam_init_barrier")
  layer_mentions, layer_entities = [], []
  layer_dense, layer_sp = [], []
  for hop in range(num_hops):
    with tf.name_scope("hop_%d" % hop):
      # Note: the question start/end embeddings for each hop?
      qry_start_emb, qry_end_emb = model_utils.layer_qry_encoder(
          qry_seq_emb,
          qry_input_ids,
          qry_input_mask,
          is_training,
          bert_config,
          qa_config,
          suffix="_%d" % hop)  # project=True

      (ret_entities, ret_mentions,
       dense_mention_vec, sp_mention_vec) = follow_mention(
           batch_entities, qry_start_emb, qry_end_emb, entity_ids, entity_mask,
           ent2ment_ind, ent2ment_val, ment2ent_map, word_emb_table,
           word_weights, mips_search_fn, tf_db, bert_config.hidden_size,
           mips_config, qa_config, is_training, ensure_mentions)
      # Note:  check this. Shouldn't for wrong choices.
      if exclude_set:
        # batch_ind = tf.expand_dims(tf.range(batch_size), 1)
        exclude_indices = tf.concat([
            tf.cast(exclude_set.indices[:, 0:1], tf.int64),
            tf.cast(tf.expand_dims(exclude_set.values, 1), tf.int64)
        ],
                                    axis=1)
        ret_entities = model_utils.remove_from_sparse(ret_entities,
                                                      exclude_indices)
      ret_entities = tf.sparse.reorder(ret_entities)
      scaled_entities = tf.SparseTensor(
          indices=ret_entities.indices,
          values=ret_entities.values / qa_config.softmax_temperature,
          dense_shape=ret_entities.dense_shape)
      batch_entities = tf.sparse.softmax(scaled_entities)  # entities updated.

      ### Start of debugging w/ tf.Print ###
      tmp_vals = batch_entities.values
      tmp_vals = tf.compat.v1.Print(
          input_=tmp_vals,
          data=[
              ret_entities.indices,
          ],
          message="ret_entities.indices at hop %d \n" % hop,
          first_n=10,
          summarize=50)
      tmp_vals = tf.compat.v1.Print(
          input_=tmp_vals,
          data=[
              ret_entities.values,
          ],
          message="ret_entities.values at hop %d \n" % hop,
          first_n=10,
          summarize=25)
      tmp_vals = tf.compat.v1.Print(
          input_=tmp_vals,
          data=[
              batch_entities.indices,
          ],
          message="scaled_entities.indices at hop %d \n" % hop,
          first_n=10,
          summarize=50)
      tmp_vals = tf.compat.v1.Print(
          input_=tmp_vals,
          data=[
              batch_entities.values,
          ],
          message="scaled_entities.values at hop %d \n" % hop,
          first_n=10,
          summarize=25)
      batch_entities = tf.SparseTensor(
          indices=batch_entities.indices,
          values=tmp_vals,
          dense_shape=batch_entities.dense_shape)
      ### End of debugging w/ tf.Print ###

      ensure_mentions = answer_mentions  # Note: seems not helpful now?
      layer_mentions.append(ret_mentions)
      layer_entities.append(ret_entities)  # Note that this is not sfed.
      layer_dense.append(dense_mention_vec)
      layer_sp.append(sp_mention_vec)

  return (layer_entities, layer_mentions, layer_dense, layer_sp,
          batch_entities_nosc, qry_seq_emb)
コード例 #10
0
def follow_fact(
    batch_facts,
    relation_st_qry,
    relation_en_qry,
    fact2fact_ind,
    fact2fact_val,
    fact2ent_ind,
    fact2ent_val,
    fact_mips_search_fn,
    tf_fact_db,
    fact_mips_config,
    qa_config,
    is_training,
    hop_id=0,
    is_printing=True,
):
  """Sparse implementation of the relation follow operation.

  Args:
    batch_facts: [batch_size, num_facts] SparseTensor of incoming facts and
      their scores.
    relation_st_qry: [batch_size, dim] Tensor representating start query vectors
      for dense retrieval.
    relation_en_qry: [batch_size, dim] Tensor representating end query vectors
      for dense retrieval.
    fact2fact_ind: [num_facts, num_facts] RaggedTensor mapping facts to entity
      indices which co-occur with them.
    fact2fact_val: [num_facts, num_facts] RaggedTensor mapping facts to entity
      scores which co-occur with them.
    fact2ent_ind: [num_facts, num_entities] RaggedTensor mapping facts to entity
      indices which co-occur with them.
    fact2ent_val: [num_facts, num_entities] RaggedTensor mapping facts to entity
      scores which co-occur with them.
    fact_mips_search_fn: Function which accepts a dense query vector and returns
      the top-k indices closest to it (from the tf_fact_db).
    tf_fact_db: [num_facts, 2 * dim] Tensor of fact representations.
    fact_mips_config: MIPS Config object.
    qa_config: QAConfig object.
    is_training: Boolean.
    hop_id: int, the current hop id.
    is_printing: if print results for debugging.

  Returns:
    ret_entities: [batch_size, num_entities] Tensor of retrieved entities.
    ret_facts: [batch_size, num_facts] Tensor of retrieved facts.
    dense_fact_vec: [batch_size, num_facts] Tensor of retrieved facts (dense).
    sp_fact_vec: [batch_size, num_facts] Tensor of retrieved facts (sparse).
  """
  num_facts = fact_mips_config.num_facts
  batch_size = batch_facts.dense_shape[0]  # number of examples in a batch
  example_ind = batch_facts.indices[:, 0]  # the list of the example ids
  fact_ind = batch_facts.indices[:, 1]  # the list of the fact ids
  fact_scs = batch_facts.values  # the list of the scores of each fact
  uniq_original_example_ind, uniq_local_example_idx = tf.unique(example_ind)
  # uniq_original_example_ind: local to original example id
  # uniq_local_example_idx: a list of local example id
  # tf.shape(uniq_original_example_ind)[0] = num_examples
  if qa_config.fact_score_threshold is not None:
    # Remove the facts which have scores lower than the threshold.
    mask = tf.greater(batch_facts.values, qa_config.fact_score_threshold)
    batch_facts = tf.sparse.retain(batch_facts, mask)
  # Sparse: Ragged sparse search from the current facts to the next facts.
  # (num_batch x num_facts) X (num_facts x num_facts)
  # [batch_size x num_facts] sparse
  if hop_id > 0:
    sp_fact_vec = model_utils.sparse_ragged_mul(
        batch_facts,
        fact2fact_ind,
        fact2fact_val,
        batch_size,
        num_facts,
        "sum",  # Note: check this.
        threshold=None,
        fix_values_to_one=True)
    # Note: find a better way for this.
    mask = tf.greater(sp_fact_vec.values, 3)  # 1/0.2 = 5
    sp_fact_vec = tf.sparse.retain(sp_fact_vec, mask)
  else:
    # For the first hop, then we use the init fact itself.
    # Because the sparse retieval is already done from the question.
    sp_fact_vec = batch_facts

  # Note: Remove the previous hop's facts
  # Note: Limit the number of fact followers.

  # Dense: Aggregate the facts in each batch as a single fact embedding vector.
  fact_embs = tf.gather(tf_fact_db, fact_ind)  # len(fact_ind) X 2dim
  # Note: check, does mean make sense?
  # sum if it was softmaxed
  # mean..
  del fact_scs  # Not used for now.
  # fact_embs = fact_embs * tf.expand_dims(fact_scs, axis=1)  #batch_fact.values
  ### Start of debugging w/ tf.Print ###
  if is_printing:
    fact_embs = tf.compat.v1.Print(
        input_=fact_embs,
        data=[tf.shape(batch_facts.indices)[0], batch_facts.indices],
        message="\n\n###\n batch_facts.indices and total #facts at hop %d \n" %
        hop_id,
        first_n=10,
        summarize=50)
    fact_embs = tf.compat.v1.Print(
        input_=fact_embs,
        data=[
            batch_facts.values,
        ],
        message="batch_facts.values at hop %d \n" % hop_id,
        first_n=10,
        summarize=25)
    fact_embs = tf.compat.v1.Print(
        input_=fact_embs,
        data=[tf.shape(sp_fact_vec.indices)[0], sp_fact_vec.indices],
        message="\n Sparse Fact Results @ hop %d \n" % hop_id +
        " sp_fact_vec.indices at hop %d \n" % hop_id,
        first_n=10,
        summarize=50)
    fact_embs = tf.compat.v1.Print(
        input_=fact_embs,
        data=[
            sp_fact_vec.values,
        ],
        message="sp_fact_vec.values at hop %d \n" % hop_id,
        first_n=10,
        summarize=25)
  ### End of debugging w/ tf.Print ###

  agg_emb = tf.math.unsorted_segment_mean(
      fact_embs, uniq_local_example_idx,
      tf.shape(uniq_original_example_ind)[0])
  batch_fact_emb = tf.scatter_nd(
      tf.expand_dims(uniq_original_example_ind, 1), agg_emb,
      tf.stack([batch_size, 2 * qa_config.projection_dim], axis=0))
  # Each instance in a batch has onely one vector as the overall fact emb.
  batch_fact_emb.set_shape([None, 2 * qa_config.projection_dim])

  # Note: Normalize the embeddings if they are not from SoftMax.
  # batch_fact_emb = tf.nn.l2_normalize(batch_fact_emb, axis=1)

  # Dense scam search.
  # [batch_size, 2 * dim]
  # Note: reform query embeddings.
  scam_qrys = batch_fact_emb + tf.concat([relation_st_qry, relation_en_qry],
                                         axis=1)
  with tf.device("/cpu:0"):
    # [batch_size, num_neighbors]
    _, ret_fact_ids = fact_mips_search_fn(scam_qrys)
    # [batch_size, num_neighbors, 2 * dim]
    ret_fact_emb = tf.gather(tf_fact_db, ret_fact_ids)

  if qa_config.l2_normalize_db:
    ret_fact_emb = tf.nn.l2_normalize(ret_fact_emb, axis=2)
  # [batch_size, 1, num_neighbors]
  # The score of a fact is its innder product with qry.
  ret_fact_scs = tf.matmul(
      tf.expand_dims(scam_qrys, 1), ret_fact_emb, transpose_b=True)
  # [batch_size, num_neighbors]
  ret_fact_scs = tf.squeeze(ret_fact_scs, 1)
  # [batch_size, num_facts] sparse
  dense_fact_vec = model_utils.convert_search_to_vector(
      ret_fact_scs, ret_fact_ids, tf.cast(batch_size, tf.int32),
      fact_mips_config.num_neighbors, fact_mips_config.num_facts)

  # Combine sparse and dense search.
  if (is_training and qa_config.train_with_sparse) or (
      (not is_training) and qa_config.predict_with_sparse):
    # [batch_size, num_mentions] sparse
    if qa_config.sparse_strategy == "dense_first":
      ret_fact_vec = model_utils.sp_sp_matmul(dense_fact_vec, sp_fact_vec)
    elif qa_config.sparse_strategy == "sparse_first":
      with tf.device("/cpu:0"):
        ret_fact_vec = model_utils.rescore_sparse(sp_fact_vec, tf_fact_db,
                                                  scam_qrys)
    else:
      raise ValueError("Unrecognized sparse_strategy %s" %
                       qa_config.sparse_strategy)
  else:
    # [batch_size, num_facts] sparse
    ret_fact_vec = dense_fact_vec

  # # Scaling facts with SoftMax.
  ret_fact_vec = tf.sparse.reorder(ret_fact_vec)
  # max_ip_scores = tf.reduce_max(ret_fact_vec.values)
  # min_ip_scores = tf.reduce_min(ret_fact_vec.values)
  # range_ip_scores = max_ip_scores - min_ip_scores
  # scaled_values = (ret_fact_vec.values - min_ip_scores) / range_ip_scores
  scaled_facts = tf.SparseTensor(
      indices=ret_fact_vec.indices,
      values=ret_fact_vec.values / tf.reduce_max(ret_fact_vec.values),
      dense_shape=ret_fact_vec.dense_shape)
  # ret_fact_vec_sf = tf.sparse.softmax(scaled_facts)
  ret_fact_vec_sf = scaled_facts

  # Remove the facts which have scores lower than the threshold.
  mask = tf.greater(ret_fact_vec_sf.values, 0.5)  # Must larger than max/5
  ret_fact_vec_sf_fitered = tf.sparse.retain(ret_fact_vec_sf, mask)

  # Note: add a soft way to score (all) the entities based on the facts.
  # Note: maybe use the pre-computed (tf-idf) similarity score here. e2e
  # Retrieve entities before Fact-SoftMaxing
  ret_entities_nosc = model_utils.sparse_ragged_mul(
      ret_fact_vec_sf,  # Use the non-filtered scores of the retrieved facts.
      fact2ent_ind,
      fact2ent_val,
      batch_size,
      qa_config.num_entities,
      "sum",
      threshold=qa_config.fact_score_threshold,
      fix_values_to_one=True)

  ret_entities = tf.SparseTensor(
      indices=ret_entities_nosc.indices,
      values=ret_entities_nosc.values / tf.reduce_max(ret_entities_nosc.values),
      dense_shape=ret_entities_nosc.dense_shape)

  ### Start of debugging w/ tf.Print ###
  if is_printing:
    tmp_vals = ret_entities.values

    tmp_vals = tf.compat.v1.Print(
        input_=tmp_vals,
        data=[tf.shape(ret_fact_vec.indices)[0], ret_fact_vec.indices],
        message="\n\n-rescored- ret_fact_vec.indices at hop %d \n" % hop_id,
        first_n=10,
        summarize=51)
    tmp_vals = tf.compat.v1.Print(
        input_=tmp_vals,
        data=[
            ret_fact_vec.values,
        ],
        message="-rescored- ret_fact_vec.values at hop %d \n" % hop_id,
        first_n=10,
        summarize=25)
    tmp_vals = tf.compat.v1.Print(
        input_=tmp_vals,
        data=[
            ret_fact_vec_sf.values,
        ],
        message="ret_fact_vec_sf.values at hop %d \n" % hop_id,
        first_n=10,
        summarize=25)
    tmp_vals = tf.compat.v1.Print(
        input_=tmp_vals,
        data=[
            tf.shape(ret_fact_vec_sf_fitered.values),
            ret_fact_vec_sf_fitered.values,
        ],
        message="ret_fact_vec_sf_fitered.values at hop %d \n" % hop_id,
        first_n=10,
        summarize=25)
    ret_entities = tf.SparseTensor(
        indices=ret_entities.indices,
        values=tmp_vals,
        dense_shape=ret_entities.dense_shape)
  ### End of debugging w/ tf.Print ###

  return ret_entities, ret_fact_vec_sf_fitered, None, None
コード例 #11
0
def follow_mention(batch_entities,
                   relation_st_qry,
                   relation_en_qry,
                   entity_word_ids,
                   entity_word_masks,
                   ent2ment_ind,
                   ent2ment_val,
                   ment2ent_map,
                   word_emb_table,
                   word_weights,
                   mips_search_fn,
                   tf_db,
                   hidden_size,
                   mips_config,
                   qa_config,
                   is_training,
                   ensure_index=None):
  """Sparse implementation of the relation follow operation.

  Args:
    batch_entities: [batch_size, num_entities] SparseTensor of incoming entities
      and their scores.
    relation_st_qry: [batch_size, dim] Tensor representating start query vectors
      for dense retrieval.
    relation_en_qry: [batch_size, dim] Tensor representating end query vectors
      for dense retrieval.
    entity_word_ids: [num_entities, max_entity_len] Tensor holding word ids of
      each entity.
    entity_word_masks: [num_entities, max_entity_len] Tensor with masks into
      word ids above.
    ent2ment_ind: [num_entities, num_mentions] RaggedTensor mapping entities to
      mention indices which co-occur with them.
    ent2ment_val: [num_entities, num_mentions] RaggedTensor mapping entities to
      mention scores which co-occur with them.
    ment2ent_map: [num_mentions] Tensor mapping mentions to their entities.
    word_emb_table: [vocab_size, dim] Tensor of word embedddings.  (?)
    word_weights: [vocab_size, 1] Tensor of word weights.  (?)
    mips_search_fn: Function which accepts a dense query vector and returns the
      top-k indices closest to it (from the tf_db).
    tf_db: [num_mentions, 2 * dim] Tensor of mention representations.
    hidden_size: Scalar dimension of word embeddings.
    mips_config: MIPSConfig object.
    qa_config: QAConfig object.
    is_training: Boolean.
    ensure_index: [batch_size] Tensor of mention ids. Only needed if
      `is_training` is True.  (? each example only one ensure entity?)

  Returns:
    ret_mentions_ids: [batch_size, k] Tensor of retrieved mention ids.
    ret_mentions_scs: [batch_size, k] Tensor of retrieved mention scores.
    ret_entities_ids: [batch_size, k] Tensor of retrieved entities ids.
  """
  if qa_config.entity_score_threshold is not None:
    # Remove the entities which have scores lower than the threshold.
    mask = tf.greater(batch_entities.values, qa_config.entity_score_threshold)
    batch_entities = tf.sparse.retain(batch_entities, mask)
  batch_size = batch_entities.dense_shape[0]  # number of the batches
  batch_ind = batch_entities.indices[:, 0]  # the list of the batch ids
  entity_ind = batch_entities.indices[:, 1]  # the list of the entity ids
  entity_scs = batch_entities.values  # the list of the scores of each entity

  # Obtain BOW embeddings for the given set of entities.
  # [NNZ, dim]  NNZ (number of non-zero entries) = len(entity_ind)
  batch_entity_emb = model_utils.entity_emb(entity_ind, entity_word_ids,
                                            entity_word_masks, word_emb_table,
                                            word_weights)
  batch_entity_emb = batch_entity_emb * tf.expand_dims(entity_scs, axis=1)
  # [batch_size, dim]
  uniq_batch_ind, uniq_idx = tf.unique(batch_ind)
  agg_emb = tf.unsorted_segment_sum(batch_entity_emb, uniq_idx,
                                    tf.shape(uniq_batch_ind)[0])
  batch_bow_emb = tf.scatter_nd(
      tf.expand_dims(uniq_batch_ind, 1), agg_emb,
      tf.stack([batch_size, hidden_size], axis=0))
  batch_bow_emb.set_shape([None, hidden_size])
  if qa_config.projection_dim is not None:
    with tf.variable_scope("projection"):
      batch_bow_emb = contrib_layers.fully_connected(
          batch_bow_emb,
          qa_config.projection_dim,
          activation_fn=tf.nn.tanh,
          reuse=tf.AUTO_REUSE,
          scope="bow_projection")
  # Each instance in a batch has onely one vector as embedding.

  # Ragged sparse search.
  # (num_batch x num_entities) * (num_entities x num_mentions)
  # [batch_size x num_mentions] sparse
  sp_mention_vec = model_utils.sparse_ragged_mul(
      batch_entities,
      ent2ment_ind,
      ent2ment_val,
      batch_size,
      mips_config.num_mentions,
      qa_config.sparse_reduce_fn,  # max or sum
      threshold=qa_config.entity_score_threshold,
      fix_values_to_one=qa_config.fix_sparse_to_one)
  if is_training and qa_config.ensure_answer_sparse:
    ensure_indices = tf.stack([tf.range(batch_size), ensure_index], axis=-1)
    sp_ensure_vec = tf.SparseTensor(
        tf.cast(ensure_indices, tf.int64),
        tf.ones([batch_size]),
        dense_shape=[batch_size, mips_config.num_mentions])
    sp_mention_vec = tf.sparse.add(sp_mention_vec, sp_ensure_vec)
    sp_mention_vec = tf.SparseTensor(
        indices=sp_mention_vec.indices,
        values=tf.minimum(1., sp_mention_vec.values),
        dense_shape=sp_mention_vec.dense_shape)

  # Dense scam search.
  # [batch_size, 2 * dim]
  # Constuct query embeddings (dual encoder: [subject; relation]).
  scam_qrys = tf.concat(
      [batch_bow_emb + relation_st_qry, batch_bow_emb + relation_en_qry],
      axis=1)
  with tf.device("/cpu:0"):
    # [batch_size, num_neighbors]
    _, ret_mention_ids = mips_search_fn(scam_qrys)
    if is_training and qa_config.ensure_answer_dense:
      ret_mention_ids = model_utils.ensure_values_in_mat(
          ret_mention_ids, ensure_index, tf.int32)
    # [batch_size, num_neighbors, 2 * dim]
    ret_mention_emb = tf.gather(tf_db, ret_mention_ids)

  if qa_config.l2_normalize_db:
    ret_mention_emb = tf.nn.l2_normalize(ret_mention_emb, axis=2)
  # [batch_size, 1, num_neighbors]
  ret_mention_scs = tf.matmul(
      tf.expand_dims(scam_qrys, 1), ret_mention_emb, transpose_b=True)
  # [batch_size, num_neighbors]
  ret_mention_scs = tf.squeeze(ret_mention_scs, 1)
  # [batch_size, num_mentions] sparse
  dense_mention_vec = model_utils.convert_search_to_vector(
      ret_mention_scs, ret_mention_ids, tf.cast(batch_size, tf.int32),
      mips_config.num_neighbors, mips_config.num_mentions)

  # Combine sparse and dense search.
  if (is_training and qa_config.train_with_sparse) or (
      (not is_training) and qa_config.predict_with_sparse):
    # [batch_size, num_mentions] sparse
    if qa_config.sparse_strategy == "dense_first":
      ret_mention_vec = model_utils.sp_sp_matmul(dense_mention_vec,
                                                 sp_mention_vec)
    elif qa_config.sparse_strategy == "sparse_first":
      with tf.device("/cpu:0"):
        ret_mention_vec = model_utils.rescore_sparse(sp_mention_vec, tf_db,
                                                     scam_qrys)
    else:
      raise ValueError("Unrecognized sparse_strategy %s" %
                       qa_config.sparse_strategy)
  else:
    # [batch_size, num_mentions] sparse
    ret_mention_vec = dense_mention_vec

  # Get entity scores and ids.
  # [batch_size, num_entities] sparse
  entity_indices = tf.cast(
      tf.gather(ment2ent_map, ret_mention_vec.indices[:, 1]), tf.int64)
  ret_entity_vec = tf.SparseTensor(
      indices=tf.concat(
          [ret_mention_vec.indices[:, 0:1],
           tf.expand_dims(entity_indices, 1)],
          axis=1),
      values=ret_mention_vec.values,
      dense_shape=[batch_size, qa_config.num_entities])

  return ret_entity_vec, ret_mention_vec, dense_mention_vec, sp_mention_vec
コード例 #12
0
def input_producer(raw_data,
                   batch_size,
                   num_steps,
                   shuffle=False,
                   randomize=False,
                   random_len=False):
    """Produces graph-based input for Penn Treebank.

  Args:
    raw_data: np tensor of size [num_words].
    batch_size: self-explained.
    num_steps: number of BPTT steps.
    shuffle: whether to shuffle sentences.
    randomize: use random segments instead of the continuous corpus.
    random_len: random sequence len.

  Returns:
    If `random_len` is set, return op that represents whether we have reached
      the end of a sequence.
    Otherwise, return number of batches in an epoch.
  """

    num_batches_per_epoch = (
        (np.size(raw_data) // batch_size) - 1) // num_steps
    raw_data = tf.convert_to_tensor(raw_data, name='raw_data', dtype=tf.int32)

    data_len = tf.size(raw_data)
    batch_len = data_len // batch_size
    data = tf.reshape(raw_data[0:batch_size * batch_len],
                      [batch_size, batch_len])

    epoch_size = (batch_len - 1) // num_steps
    with tf.device('/cpu:0'):
        epoch_size = tf.identity(epoch_size, name='epoch_size')

        if random_len:
            start_idx = tf.Variable(0,
                                    name='start_idx',
                                    dtype=tf.int32,
                                    trainable=False)
            base_bptt = tf.cond(
                tf.random_uniform(shape=(), minval=0., maxval=1.) < 0.95,
                lambda: tf.cast(num_steps, dtype=tf.float32),
                lambda: tf.cast(num_steps, dtype=tf.float32) / 2.)
            seq_len = tf.random.truncated_normal(shape=(),
                                                 mean=base_bptt,
                                                 stddev=5.,
                                                 dtype=tf.float32)
            seq_len = tf.cast(seq_len, dtype=tf.int32)
            seq_len = tf.minimum(seq_len,
                                 num_steps + 20)  # seq_len <= bptt + 40
            seq_len = tf.minimum(seq_len, batch_len - start_idx - 1)
            end_idx = start_idx + seq_len

            x = data[:, start_idx:end_idx]
            y = data[:, start_idx + 1:end_idx + 1]

            with tf.control_dependencies([x, y]):
                with tf.control_dependencies([tf.assign(start_idx, end_idx)]):
                    should_reset = tf.greater_equal(end_idx, batch_len - 3)

            reset_start_idx = tf.assign(start_idx, 0)
            return (x, y, num_batches_per_epoch, reset_start_idx, should_reset,
                    base_bptt)

        if randomize:
            i = tf.random_uniform([1],
                                  minval=0,
                                  maxval=batch_len - num_steps,
                                  dtype=tf.int32)
            x = tf.strided_slice(data, [0, i], [batch_size, i + num_steps])
            y = tf.strided_slice(data, [0, i + 1],
                                 [batch_size, i + num_steps + 1])
        else:
            i = tf.train.range_input_producer(epoch_size,
                                              shuffle=shuffle).dequeue()
            x = tf.strided_slice(data, [0, i * num_steps],
                                 [batch_size, (i + 1) * num_steps])
            y = tf.strided_slice(data, [0, i * num_steps + 1],
                                 [batch_size, (i + 1) * num_steps + 1])
        x.set_shape([batch_size, num_steps])
        y.set_shape([batch_size, num_steps])

        return x, y, num_batches_per_epoch
コード例 #13
0
    def __init__(self, batch_env, step, is_training, should_log, config):
        """Create an instance of the PPO algorithm.

    Args:
      batch_env: In-graph batch environment.
      step: Integer tensor holding the current training step.
      is_training: Boolean tensor for whether the algorithm should train.
      should_log: Boolean tensor for whether summaries should be returned.
      config: Object containing the agent configuration as attributes.
    """
        self._batch_env = batch_env
        self._step = step
        self._is_training = is_training
        self._should_log = should_log
        self._config = config
        self._observ_filter = normalize.StreamingNormalize(
            self._batch_env.observ[0],
            center=True,
            scale=True,
            clip=5,
            name='normalize_observ')
        self._reward_filter = normalize.StreamingNormalize(
            self._batch_env.reward[0],
            center=False,
            scale=True,
            clip=10,
            name='normalize_reward')
        # Memory stores tuple of observ, action, mean, logstd, reward.
        template = (self._batch_env.observ[0], self._batch_env.action[0],
                    self._batch_env.action[0], self._batch_env.action[0],
                    self._batch_env.reward[0])
        self._memory = memory.EpisodeMemory(template, config.update_every,
                                            config.max_length, 'memory')
        self._memory_index = tf.Variable(0, False)
        use_gpu = self._config.use_gpu and utility.available_gpus()
        with tf.device('/gpu:0' if use_gpu else '/cpu:0'):
            # Create network variables for later calls to reuse.
            self._network(tf.zeros_like(self._batch_env.observ)[:, None],
                          tf.ones(len(self._batch_env)),
                          reuse=None)
            cell = self._config.network(self._batch_env.action.shape[1].value)
            with tf.variable_scope('ppo_temporary'):
                self._episodes = memory.EpisodeMemory(template, len(batch_env),
                                                      config.max_length,
                                                      'episodes')
                self._last_state = utility.create_nested_vars(
                    cell.zero_state(len(batch_env), tf.float32))
                self._last_action = tf.Variable(tf.zeros_like(
                    self._batch_env.action),
                                                False,
                                                name='last_action')
                self._last_mean = tf.Variable(tf.zeros_like(
                    self._batch_env.action),
                                              False,
                                              name='last_mean')
                self._last_logstd = tf.Variable(tf.zeros_like(
                    self._batch_env.action),
                                                False,
                                                name='last_logstd')
        self._penalty = tf.Variable(self._config.kl_init_penalty,
                                    False,
                                    dtype=tf.float32)
        self._policy_optimizer = self._config.policy_optimizer(
            self._config.policy_lr, name='policy_optimizer')
        self._value_optimizer = self._config.value_optimizer(
            self._config.value_lr, name='value_optimizer')
コード例 #14
0
def train(args, build_train_graph):
    """Trains the model."""

    if args.verbose:
        tf.logging.set_verbosity(tf.logging.INFO)
    else:
        tf.logging.set_verbosity(tf.logging.ERROR)

    # Create input data pipeline.
    with tf.device("/cpu:0"):
        train_files = glob.glob(args.train_glob)
        if not train_files:
            raise RuntimeError(
                "No training images found with glob '{}'.".format(args.train_glob))
        train_dataset = tf.data.Dataset.from_tensor_slices(train_files)
        train_dataset = train_dataset.shuffle(buffer_size=len(train_files)).repeat()
        if 'npy' in args.train_glob:  # reading numpy arrays directly instead of from images
            train_dataset = train_dataset.map(  # https://stackoverflow.com/a/49459838
                lambda item: tuple(tf.numpy_function(read_npy_file_helper, [item], [tf.float32, ])),
                num_parallel_calls=args.preprocess_threads)
        else:
            train_dataset = train_dataset.map(
                read_png, num_parallel_calls=args.preprocess_threads)
        train_dataset = train_dataset.map(lambda x: tf.random_crop(x, (args.patchsize, args.patchsize, 3)))
        train_dataset = train_dataset.batch(args.batchsize)
        train_dataset = train_dataset.prefetch(32)

    # num_pixels = args.batchsize * args.patchsize ** 2

    # Get training patch from dataset.
    x = train_dataset.make_one_shot_iterator().get_next()
    res = build_train_graph(args, x)
    train_loss = res['train_loss']
    train_op = res['train_op']
    model_name = res['model_name']

    # boiler plate code for logging
    runname = get_runname(vars(args), record_keys=('num_filters', 'num_hfilters', 'lmbda'), prefix=model_name)
    save_dir = os.path.join(args.checkpoint_dir, runname)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    import json
    import datetime
    with open(os.path.join(save_dir, 'record.txt'), 'a') as f:  # keep more detailed record in text file
        f.write(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + '\n')
        f.write(json.dumps(vars(args), indent=4, sort_keys=True) + '\n')
        f.write('\n')
    with open(os.path.join(save_dir, 'args.json'), 'w') as f:  # will overwrite existing
        json.dump(vars(args), f, indent=4, sort_keys=True)

    # save a copy of the script that defined the model
    from shutil import copy
    copied_path = copy(model_name + '.py', save_dir)
    print('Saved a copy of %s.py to %s' % (model_name, copied_path))

    hooks = [
        tf.train.StopAtStepHook(last_step=args.last_step),
        tf.train.NanTensorHook(train_loss),
    ]

    save_summary_secs = args.save_summary_secs
    if args.logdir != '':
        for key in res:
            if 'bpp' in key or 'loss' in key or key in ('mse', 'psnr'):
                tf.summary.scalar(key, res[key])
            elif key in ('original', 'reconstruction'):
                tf.summary.image(key, res[key], max_outputs=2)

        summary_op = tf.summary.merge_all()
        tf_log_dir = os.path.join(args.logdir, runname)
        summary_hook = tf.train.SummarySaverHook(save_secs=save_summary_secs, output_dir=tf_log_dir,
                                                 summary_op=summary_op)
        hooks.append(summary_hook)

    with tf.train.MonitoredTrainingSession(
            hooks=hooks, checkpoint_dir=save_dir,
            save_checkpoint_secs=args.save_checkpoint_secs, save_summaries_secs=save_summary_secs) as sess:
        while not sess.should_stop():
            sess.run(train_op)
コード例 #15
0
def main(unused_argv=None):
  os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu_number)
  source_path = utils.shell_path(FLAGS.source_path)
  checkpoint_path = utils.shell_path(FLAGS.checkpoint_path)
  save_path = utils.shell_path(FLAGS.save_path)
  if not save_path:
    raise ValueError("Must specify a save_path.")
  tf.logging.set_verbosity(FLAGS.log)

  # Use directory of files
  if tf.gfile.IsDirectory(source_path):
    files = tf.gfile.ListDirectory(source_path)
    file_extensions = [os.path.splitext(f)[1] for f in files]
    if ".wav" in file_extensions:
      file_extension = ".wav"
    elif ".npy" in file_extensions:
      file_extension = ".npy"
    else:
      raise RuntimeError("Folder must contain .wav or .npy files.")
    file_extension = ".npy" if FLAGS.npy_only else file_extension
    files = sorted([
        os.path.join(source_path, fname)
        for fname in files
        if fname.lower().endswith(file_extension)
    ])
  # Use a single file
  elif source_path.lower().endswith((".wav", ".npy")):
    file_extension = os.path.splitext(source_path.lower())[1]
    files = [source_path]
  else:
    raise ValueError(
        "source_path {} must be a folder or file.".format(source_path))

  # Now synthesize from files one batch at a time
  batch_size = FLAGS.batch_size
  sample_length = FLAGS.sample_length
  n = len(files)
  for start in range(0, n, batch_size):
    end = start + batch_size
    batch_files = files[start:end]
    save_names = [
        os.path.join(save_path,
                     "gen_" + os.path.splitext(os.path.basename(f))[0] + ".wav")
        for f in batch_files
    ]
    # Encode waveforms
    if file_extension == ".wav":
      batch_data = fastgen.load_batch_audio(
          batch_files, sample_length=sample_length)
      encodings = fastgen.encode(
          batch_data, checkpoint_path, sample_length=sample_length)
    # Or load encodings
    else:
      encodings = fastgen.load_batch_encodings(
          batch_files, sample_length=sample_length)
    # Synthesize multi-gpu
    if FLAGS.gpu_number != 0:
      with tf.device("/device:GPU:%d" % FLAGS.gpu_number):
        fastgen.synthesize(
            encodings, save_names, checkpoint_path=checkpoint_path)
    # Single gpu
    else:
      fastgen.synthesize(
          encodings, save_names, checkpoint_path=checkpoint_path)
コード例 #16
0
 def provide_one_hot_labels(self, batch_size):
     """Returns a batch of one-hot labels."""
     with tf.name_scope('inputs'):
         with tf.device('/cpu:0'):
             return self.dataset.provide_one_hot_labels(
                 batch_size=batch_size)
コード例 #17
0
def main(args):
    tf.logging.set_verbosity(tf.logging.ERROR)
    np.set_printoptions(linewidth=200)
    random_seed = args.random_seed
    checkpoint_path = os.path.join(tempfile.mkdtemp(), "model.ckpt")

    # Input activations for the attention layer
    random_gen = np.random.default_rng(seed=random_seed)
    activations_np = random_gen.uniform(-0.1,
                                        0.1,
                                        size=(args.batch_size,
                                              args.source_sequence_length,
                                              args.hidden_length))

    # Configure the IPU
    cfg = ipu.utils.create_ipu_config(profiling=args.profile,
                                      report_directory="./report/")
    cfg = ipu.utils.auto_select_ipus(cfg, 1)
    ipu.utils.configure_ipu_system(cfg)

    # Build IPU graphs
    sparse_decoder_graph = tf.Graph()
    sparse_transformer = DynsparseTransformer(args)
    with sparse_decoder_graph.as_default():
        with tf.device("cpu"):
            # placeholder for activations
            # weight placeholders are created inside sparse_transfomer
            inputs_ph = tf.placeholder(args.dtype, activations_np.shape)
        with ipu.scopes.ipu_scope("/device:IPU:0"):
            sparse_decoder = partial(sparse_transformer_fwd_and_grad,
                                     sparse_transformer)
            sparse_decoder_fetches = ipu.ipu_compiler.compile(
                sparse_decoder, [inputs_ph])
            ipu.utils.move_variable_initialization_to_cpu()

    # sparse-decoder
    with tf.Session(graph=sparse_decoder_graph) as sess:
        # initialize weights
        sess.run(tf.global_variables_initializer())

        # Save the sparse weights to checkpoint as dense
        sparse_transformer.checkpointAsDense(checkpoint_path)

        # run sparse decoder
        sparse_result = sess.run(sparse_decoder_fetches,
                                 feed_dict={inputs_ph: activations_np})

    # Create a dense transformer and initialize the weights to the values that
    # the sparse model was initialzed with originally
    dense_decoder_graph = tf.Graph()
    dense_transformer = DenseTransformer(args)
    with dense_decoder_graph.as_default():
        with tf.device("cpu"):
            # placeholder for activations
            # weights will get streamed from checkpoint
            inputs_ph = tf.placeholder(args.dtype, activations_np.shape)

        with ipu.scopes.ipu_scope("/device:IPU:0"):
            dense_decoder_fetches = partial(dense_transformer_fwd_and_grad,
                                            dense_transformer)
            dense_graph = ipu.ipu_compiler.compile(dense_decoder_fetches,
                                                   [inputs_ph])
            ipu.utils.move_variable_initialization_to_cpu()

        with tf.device("cpu"):
            # We will only load the trainable variables, not momentum etc.
            loader = tf.train.Saver(tf.trainable_variables())

    # dense-decoder
    with tf.Session(graph=dense_decoder_graph) as sess:
        # Initialized momentums which are not part of the checkpoint
        sess.run(tf.global_variables_initializer())
        # Restore saved trainable variables
        loader.restore(sess, checkpoint_path)
        dense_result = sess.run(dense_graph,
                                feed_dict={inputs_ph: activations_np})

    # TEST
    rtol = 1e-05
    atol = 1e-05
    if args.dtype == tf.float16:
        rtol = 1e-04
        atol = 1e-02
    # Compare model output activations (actual vs. desired) -> (sparse vs. dense)
    np.testing.assert_allclose(sparse_result["output_activation"],
                               dense_result["output_activation"],
                               atol=atol,
                               rtol=rtol,
                               err_msg="Output activations do not match.")

    # Compate gradient of output wrt. input
    np.testing.assert_allclose(sparse_result["input_grad"],
                               dense_result["input_grad"],
                               atol=atol,
                               rtol=rtol,
                               err_msg="Grads wrt. inputs do not match")

    # Compare the dense_w and sparse grads of every sparse layer
    for name, sparse_layer in sparse_transformer.sparse_layers.items():
        # Compate the dense grads
        dense_grad = dense_result[name + "/weight" + "_grad"]
        sparse_grad_w = sparse_result[name + "_grad_w"]
        np.testing.assert_allclose(
            sparse_grad_w,
            dense_grad,
            atol=atol,
            rtol=rtol,
            err_msg=f"Dense grads for layer {name} do not match")

        # Compare the sparse grads
        sparse_grad_padded = sparse_result[name +
                                           "/sparse_layer/nz_values_grad"]
        sparse_grad_data = sparse.SparseRepresentation(
            sparse_layer.weights.get_metainfo(), sparse_grad_padded)
        i, j, sparse_grad = sparse.triplets_from_representation(
            sparse_layer.weights.spec, sparse_grad_data,
            sparse_layer.weights.matmul_options)

        # Convert dense grads to blocks
        block_size, _ = sparse_layer.get_nonzero_blocks_shape()
        nx, ny = dense_grad.shape[0] // block_size, dense_grad.shape[
            1] // block_size
        strides = np.array(dense_grad.strides)  # strides are in bytes
        strides = tuple(strides * block_size) + tuple(strides)
        blocked_dense_grad = np.lib.stride_tricks.as_strided(
            dense_grad, (nx, ny, block_size, block_size), strides)
        if block_size == 1:
            blocked_dense_grad = np.squeeze(np.copy(blocked_dense_grad),
                                            axis=(-2, -1))
        np.testing.assert_allclose(
            sparse_grad,
            blocked_dense_grad[i, j],
            atol=atol,
            rtol=rtol,
            err_msg=f"Sparse grads for layer {name} do not match")

    print("All results match.")
    return sparse_result, dense_result
コード例 #18
0
def train_eval_offline(
        # Basic args.
        log_dir,
        data_file,
        agent_module,
        env_name='HalfCheetah-v2',
        n_train=int(1e6),
        shuffle_steps=0,
        seed=0,
        use_seed_for_data=False,
        # Train and eval args.
        total_train_steps=int(1e6),
        summary_freq=100,
        print_freq=1000,
        save_freq=int(2e4),
        eval_freq=5000,
        n_eval_episodes=20,
        # Agent args.
        model_params=(((200, 200), ), 2),
        optimizers=(('adam', 0.001), ),
        batch_size=256,
        weight_decays=(0.0, ),
        update_freq=1,
        update_rate=0.005,
        discount=0.99,
):
    """Training a policy with a fixed dataset."""
    # Create tf_env to get specs.
    tf_env = train_eval_utils.env_factory(env_name)
    observation_spec = tf_env.observation_spec()
    action_spec = tf_env.action_spec()

    # Prepare data.
    logging.info('Loading data from %s ...', data_file)
    data_size = utils.load_variable_from_ckpt(data_file, 'data._capacity')
    with tf.device('/cpu:0'):
        full_data = dataset.Dataset(observation_spec, action_spec, data_size)
    data_ckpt = tf.train.Checkpoint(data=full_data)
    data_ckpt.restore(data_file)
    # Split data.
    n_train = min(n_train, full_data.size)
    logging.info('n_train %s.', n_train)
    if use_seed_for_data:
        rand = np.random.RandomState(seed)
    else:
        rand = np.random.RandomState(0)
    shuffled_indices = utils.shuffle_indices_with_steps(n=full_data.size,
                                                        steps=shuffle_steps,
                                                        rand=rand)
    train_indices = shuffled_indices[:n_train]
    train_data = full_data.create_view(train_indices)

    # Create agent.
    agent_flags = utils.Flags(observation_spec=observation_spec,
                              action_spec=action_spec,
                              model_params=model_params,
                              optimizers=optimizers,
                              batch_size=batch_size,
                              weight_decays=weight_decays,
                              update_freq=update_freq,
                              update_rate=update_rate,
                              discount=discount,
                              train_data=train_data)
    agent_args = agent_module.Config(agent_flags).agent_args
    agent = agent_module.Agent(**vars(agent_args))
    agent_ckpt_name = os.path.join(log_dir, 'agent')

    # Restore agent from checkpoint if there exists one.
    if tf.io.gfile.exists('{}.index'.format(agent_ckpt_name)):
        logging.info('Checkpoint found at %s.', agent_ckpt_name)
        agent.restore(agent_ckpt_name)

    # Train agent.
    train_summary_dir = os.path.join(log_dir, 'train')
    eval_summary_dir = os.path.join(log_dir, 'eval')
    train_summary_writer = tf.compat.v2.summary.create_file_writer(
        train_summary_dir)
    eval_summary_writers = collections.OrderedDict()
    for policy_key in agent.test_policies.keys():
        eval_summary_writer = tf.compat.v2.summary.create_file_writer(
            os.path.join(eval_summary_dir, policy_key))
        eval_summary_writers[policy_key] = eval_summary_writer
    eval_results = []

    time_st_total = time.time()
    time_st = time.time()
    step = agent.global_step
    timed_at_step = step
    while step < total_train_steps:
        agent.train_step()
        step = agent.global_step
        if step % summary_freq == 0 or step == total_train_steps:
            agent.write_train_summary(train_summary_writer)
        if step % print_freq == 0 or step == total_train_steps:
            agent.print_train_info()
        if step % eval_freq == 0 or step == total_train_steps:
            time_ed = time.time()
            time_cost = time_ed - time_st
            logging.info('Training at %.4g steps/s.',
                         (step - timed_at_step) / time_cost)
            eval_result, eval_infos = train_eval_utils.eval_policies(
                tf_env, agent.test_policies, n_eval_episodes)
            eval_results.append([step] + eval_result)
            logging.info('Testing at step %d:', step)
            for policy_key, policy_info in eval_infos.items():
                logging.info(
                    utils.get_summary_str(step=None,
                                          info=policy_info,
                                          prefix=policy_key + ': '))
                utils.write_summary(eval_summary_writers[policy_key], step,
                                    policy_info)
            time_st = time.time()
            timed_at_step = step
        if step % save_freq == 0:
            agent.save(agent_ckpt_name)
            logging.info('Agent saved at %s.', agent_ckpt_name)

    agent.save(agent_ckpt_name)
    time_cost = time.time() - time_st_total
    logging.info('Training finished, time cost %.4gs.', time_cost)
    return np.array(eval_results)
コード例 #19
0
ファイル: e4.py プロジェクト: faymek/compression
def test_train(args):
    """Trains the model."""

    if args.verbose:
        tf.logging.set_verbosity(tf.logging.INFO)

    # Create input data pipeline.
    with tf.device("/cpu:0"):
        train_files = glob.glob(args.train_glob)
        if not train_files:
            raise RuntimeError(
                "No training images found with glob '{}'.".format(
                    args.train_glob))
        train_dataset = tf.data.Dataset.from_tensor_slices(train_files)
        train_dataset = train_dataset.shuffle(
            buffer_size=len(train_files)).repeat()
        train_dataset = train_dataset.map(
            read_png, num_parallel_calls=args.preprocess_threads)
        train_dataset = train_dataset.map(
            lambda x: tf.random_crop(x, (args.patchsize, args.patchsize, 3)))
        train_dataset = train_dataset.batch(args.batchsize)
        train_dataset = train_dataset.prefetch(32)

    num_pixels = args.batchsize * args.patchsize**2

    # Get training patch from dataset.
    x = train_dataset.make_one_shot_iterator().get_next()

    # Instantiate model.
    analysis_transform = AnalysisTransform(args.num_filters)
    synthesis_transform = SynthesisTransform(args.num_filters)
    hyper_analysis_transform = HyperAnalysisTransform(args.num_filters)
    hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters)
    entropy_bottleneck = DynamicEntropyBottleneck(name="entropy_bottleneck")

    # Build autoencoder and hyperprior.
    y = analysis_transform(x)
    z = hyper_analysis_transform(abs(y))
    z_tilde, z_likelihoods = entropy_bottleneck(z, training=True)
    sigma = hyper_synthesis_transform(z_tilde)
    scale_table = np.exp(
        np.linspace(np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS))
    conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table)
    y_tilde, y_likelihoods = conditional_bottleneck(y, training=True)

    rand_rate = tf.random_uniform([], minval=0.0, maxval=0.75)  # drop rate
    random_tensor = tf.random_uniform([256], dtype=tf.float32)
    keep_prob = 1 - rand_rate
    scale = 1 / keep_prob
    keep_mask = random_tensor >= rand_rate
    y_tilde_drop = y_tilde * scale * tf.cast(keep_mask, tf.float32)

    x_tilde = synthesis_transform(y_tilde_drop)

    # Total number of bits divided by number of pixels.
    train_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) + tf.reduce_sum(
        tf.log(z_likelihoods))) / (-np.log(2) * num_pixels)

    # Mean squared error across pixels.
    train_mse = tf.reduce_mean(tf.squared_difference(x, x_tilde))
    # Multiply by 255^2 to correct for rescaling.
    train_mse *= 255**2

    # The rate-distortion cost.
    train_loss = args.lmbda * train_mse + train_bpp

    with tf.Session() as sess:
        latest = tf.train.latest_checkpoint(checkpoint_dir="./tfc256-05")
        tf.train.Saver().restore(sess, save_path=latest)

    step = tf.train.create_global_step()
    main_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4)
    aux_optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)

    main_step = main_optimizer.minimize(train_loss, global_step=step)
    aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0])

    train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0])

    tf.summary.scalar("loss", train_loss)
    tf.summary.scalar("bpp", train_bpp)
    tf.summary.scalar("mse", train_mse)

    tf.summary.image("original", quantize_image(x))
    tf.summary.image("reconstruction", quantize_image(x_tilde))

    hooks = [
        tf.train.StopAtStepHook(last_step=args.last_step),
        tf.train.NanTensorHook(train_loss),
    ]
    with tf.train.MonitoredTrainingSession(hooks=hooks,
                                           checkpoint_dir=args.checkpoint_dir,
                                           save_checkpoint_secs=300,
                                           save_summaries_secs=60) as sess:
        while not sess.should_stop():
            sess.run(train_op)
コード例 #20
0
ファイル: Worker1.py プロジェクト: shazi129/py_workspace
import time
import tensorflow.compat.v1 as tf

# Configuration of cluster 

worker_hosts = [ "9.134.80.230:9501",  "9.134.189.246:9501"]
ps_hosts = ["9.134.189.246:9500"]
cluster = tf.train.ClusterSpec({"worker": worker_hosts, "ps":ps_hosts})

server=tf.train.Server(cluster,job_name='worker',task_index=1)#找到‘worker’名字下的,task0,也就是机器A
with tf.device(tf.train.replica_device_setter()):
    w = tf.get_variable('w',(1),tf.float32,initializer=tf.constant_initializer(2))
    add = tf.add(w, 1)
    update = tf.assign(w, add)

with tf.Session(server.target) as sess:
    sess.run(tf.global_variables_initializer())
    for _ in range(100):
        print("==============================")
        print(sess.run(w))
        print(sess.run(update))
        time.sleep(1)
コード例 #21
0
ファイル: midime_train.py プロジェクト: xzm2004260/midiMe
def train(train_dir,
          config,
          dataset_fn,
          checkpoints_to_keep=5,
          keep_checkpoint_every_n_hours=1,
          num_steps=None,
          master='',
          num_sync_workers=0,
          num_ps_tasks=0,
          task=0):
    """Train loop."""
    tf.gfile.MakeDirs(train_dir)
    is_chief = (task == 0)
    if is_chief:
        _trial_summary(config.hparams, config.train_examples_path
                       or config.tfds_name, train_dir)

    with tf.Graph().as_default():
        with tf.device(
                tf.train.replica_device_setter(num_ps_tasks,
                                               merge_devices=True)):
            model = config.model
            model.build(config.hparams,
                        config.data_converter.output_depth,
                        encoder_train=config.encoder_train,
                        decoder_train=config.decoder_train)
            optimizer = model.train(**_get_input_tensors(dataset_fn(), config))
            restored_vars = _get_restore_vars(config.var_train_pattern)
            _set_trainable_vars(config.var_train_pattern)

            hooks = []
            if num_sync_workers:
                optimizer = tf.train.SyncReplicasOptimizer(
                    optimizer, num_sync_workers)
                hooks.append(optimizer.make_session_run_hook(is_chief))

            grads, var_list = zip(*optimizer.compute_gradients(model.loss))
            global_norm = tf.global_norm(grads)
            tf.summary.scalar('global_norm', global_norm)

            if config.hparams.clip_mode == 'value':
                g = config.hparams.grad_clip
                clipped_grads = [
                    tf.clip_by_value(grad, -g, g) for grad in grads
                ]
            elif config.hparams.clip_mode == 'global_norm':
                clipped_grads = tf.cond(
                    global_norm < config.hparams.grad_norm_clip_to_zero,
                    lambda: tf.clip_by_global_norm(grads,
                                                   config.hparams.grad_clip,
                                                   use_norm=global_norm)[0],
                    lambda: [tf.zeros(tf.shape(g)) for g in grads])
            else:
                raise ValueError('Unknown clip_mode: {}'.format(
                    config.hparams.clip_mode))
            train_op = optimizer.apply_gradients(zip(clipped_grads, var_list),
                                                 global_step=model.global_step,
                                                 name='train_step')

            logging_dict = {
                'global_step': model.global_step,
                'loss': model.loss
            }

            hooks.append(
                tf.train.LoggingTensorHook(logging_dict, every_n_iter=5))
            if num_steps:
                hooks.append(tf.train.StopAtStepHook(last_step=num_steps))

            variables_to_restore = contrib_framework.get_variables_to_restore(
                include=[v.name for v in restored_vars])
            init_assign_op, init_feed_dict = contrib_framework.assign_from_checkpoint(
                config.pretrained_path, variables_to_restore)

            def InitAssignFn(scaffold, sess):
                sess.run(init_assign_op, init_feed_dict)

            scaffold = tf.train.Scaffold(
                init_fn=InitAssignFn,
                saver=tf.train.Saver(
                    max_to_keep=checkpoints_to_keep,
                    keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours,
                ))
            contrib_training.train(train_op=train_op,
                                   logdir=train_dir,
                                   scaffold=scaffold,
                                   hooks=hooks,
                                   save_checkpoint_secs=60,
                                   master=master,
                                   is_chief=is_chief)
コード例 #22
0
def build_graph(model, hparams, scope=None):
  """build the computation graph."""
  utils.print_out("# creating %s graph ..." % model.mode)
  dtype = tf.float32
  num_layers = hparams.num_layers
  num_gpus = hparams.num_gpus

  with tf.variable_scope(scope or "dynamic_seq2seq", dtype=dtype):
    # Encoder
    # Look up embedding, emp_inp: [max_time, batch_size, num_units]
    with tf.variable_scope("encoder_emb_inp"):
      encoder_emb_inp = tf.nn.embedding_lookup(model.embedding_encoder,
                                               model.iterator.source)
      action_emb_inp = tf.nn.embedding_lookup(model.embedding_encoder,
                                              model.iterator.action)
    with tf.variable_scope("encoder1_intent"):
      res = _build_encoder_simple(
          model,
          model.iterator.intent,
          model.iterator.intent_len,
          num_units=hparams.encoder_intent_unit)
      _, encoder_state1_aux, _ = res
    with tf.variable_scope("encoder2_kb"):
      res = _build_encoder_hierarchial(
          model, model.iterator.kb, num_units=hparams.encoder_kb_unit)
      _, encoder_state2_aux, _ = res

    with tf.variable_scope("encoder1"):
      model.encoder_input_projection1 = tf.layers.Dense(
          hparams.num_units, use_bias=False, name="encoder_1_input_projection")
      tiled_encoder_state1_aux = tf.reshape(
          encoder_state1_aux,
          [model.batch_size, 1, hparams.encoder_intent_unit])
      time_step = tf.shape(encoder_emb_inp)[1]
      tiled_encoder_state1_aux = tf.tile(tiled_encoder_state1_aux,
                                         [1, time_step, 1])
      concat1 = tf.concat([encoder_emb_inp, tiled_encoder_state1_aux],
                          2)  # emb_intnt+num_unites
      encoder1_input = model.encoder_input_projection1(concat1)
      encoder_outputs1, encoder_state1 = _build_encoder(
          model, encoder1_input, hparams)  # 1= customer, 2= agent

    with tf.variable_scope("encoder2"):
      model.encoder_input_projection2 = tf.layers.Dense(
          hparams.num_units, use_bias=False, name="encoder_2_input_projection")
      tiled_encoder_state2_aux = tf.reshape(
          encoder_state2_aux, [model.batch_size, 1, hparams.encoder_kb_unit])
      time_step = tf.shape(encoder_emb_inp)[1]
      tiled_encoder_state2_aux = tf.tile(tiled_encoder_state2_aux,
                                         [1, time_step, 1])
      concat2 = tf.concat([encoder_emb_inp, tiled_encoder_state2_aux],
                          2)  # emb_intnt+num_unites
      encoder2_input = model.encoder_input_projection2(concat2)
      encoder_outputs2, encoder_state2 = _build_encoder(model, encoder2_input,
                                                        hparams)

    ## Decoder
    with tf.variable_scope("decoder1"):
      res = _build_decoder(model, encoder_outputs1, encoder_state1, hparams,
                           vocab_utils.start_of_turn1,
                           vocab_utils.start_of_turn2, model.output_layer1,
                           encoder_state1_aux)
      logits_trian1, _, sample_id_train1, sample_id_infer1 = res

    with tf.variable_scope("decoder2"):
      res = _build_decoder(model, encoder_outputs2, encoder_state2, hparams,
                           vocab_utils.start_of_turn2,
                           vocab_utils.start_of_turn1, model.output_layer2,
                           encoder_state2_aux)
      logits_trian2, _, sample_id_train2, sample_id_infer2 = res

    with tf.variable_scope("decoder_action"):
      res = _build_decoder_action(
          model,
          encoder_state2,
          hparams,
          hparams.t1.encode(),  # dialogue ends with t2, action starts with t1
          hparams.t2.encode(),
          model.output_layer_action)
      logits_trian3, _, sample_id_train3, sample_id_infer3 = res

    with tf.variable_scope("value_network1"):
      res = _build_value_network(model, encoder_emb_inp, action_emb_inp,
                                 encoder_state1_aux, model.vn_project11,
                                 model.vn_project12, hparams)
      dialogue1_val, _ = res
    with tf.variable_scope("value_network2"):
      res = _build_value_network(model, encoder_emb_inp, action_emb_inp,
                                 encoder_state2_aux, model.vn_project21,
                                 model.vn_project22, hparams, True)
      dialogue2_val, action_val = res

      model.logits_trian1 = logits_trian1
      model.logits_trian2 = logits_trian2
      model.dialogue1_val = dialogue1_val
      model.dialogue2_val = dialogue2_val

    if model.mode in [
        tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL,
        dialogue_utils.mode_self_play_mutable
    ]:
      with tf.device(model_helper.get_device_str(num_layers - 1, num_gpus)):
        sl_loss, sl_loss_arr = _compute_loss(model, logits_trian1,
                                             logits_trian2, logits_trian3)

      with tf.device(model_helper.get_device_str(num_layers - 1, num_gpus)):
        rl_loss_arr = _compute_loss_selfplay(
            model, logits_trian1, logits_trian2, logits_trian3, dialogue1_val,
            dialogue2_val, action_val)

    elif model.mode == tf.estimator.ModeKeys.PREDICT or model.mode == dialogue_utils.mode_self_play_immutable:
      sl_loss, sl_loss_arr, rl_loss_arr = None, None, None
    else:
      raise ValueError("mode not known")

    sample_id_arr_train = [sample_id_train1, sample_id_train2, sample_id_train3]
    sample_id_arr_infer = [sample_id_infer1, sample_id_infer2, sample_id_infer3]

    return sl_loss, sl_loss_arr, rl_loss_arr, sample_id_arr_train, sample_id_arr_infer
コード例 #23
0
ファイル: pipelining.py プロジェクト: muzzynine/examples-1
        print(
            "Profiling enabled, repeat count set to one and executing the program once."
        )

    # Create the data queues from/to IPU
    infeed_queue = ipu.ipu_infeed_queue.IPUInfeedQueue(dataset, "infeed")
    outfeed_queue = ipu.ipu_outfeed_queue.IPUOutfeedQueue("outfeed")

    # With batch size BS, gradient accumulation count GAC and repeat count RPT,
    # at every step n = (BS * GAC * RPT) examples are used.
    # So in order to evaluate at least N total examples, do ceil(N / n) steps
    num_train_examples = int(args.epochs * n_examples)
    examples_per_step = args.batch_size * args.gradient_accumulation_count * args.repeat_count
    steps = ((num_train_examples - 1) // examples_per_step) + 1

    with tf.device('cpu'):
        lr = tf.placeholder(np.float32, [])

    with ipu.scopes.ipu_scope("/device:IPU:0"):
        compiled_model = ipu.ipu_compiler.compile(model, inputs=[lr])

    outfeed_op = outfeed_queue.dequeue()

    ipu.utils.move_variable_initialization_to_cpu()
    init_op = tf.global_variables_initializer()

    # Configure the IPU.
    # With pipelining, IPU-level profiling is needed to correctly visualise the execution trace.
    # For pipelined models either SNAKE or HOOF IPU selection orders are advised;
    # the latter works best when the first and last stage are on the same IPU.
    # For more information, see the API section of the Targeting the IPU from TensorFlow document:
コード例 #24
0
ファイル: train.py プロジェクト: ykumards/magenta
def main(unused_argv=None):
    tf.logging.set_verbosity(FLAGS.log)

    if FLAGS.config is None:
        raise RuntimeError("No config name specified.")

    config = utils.get_module("wavenet." + FLAGS.config).Config(
        FLAGS.train_path)

    logdir = FLAGS.logdir
    tf.logging.info("Saving to %s" % logdir)

    with tf.Graph().as_default():
        total_batch_size = FLAGS.total_batch_size
        assert total_batch_size % FLAGS.worker_replicas == 0
        worker_batch_size = total_batch_size / FLAGS.worker_replicas

        # Run the Reader on the CPU
        cpu_device = "/job:localhost/replica:0/task:0/cpu:0"
        if FLAGS.ps_tasks:
            cpu_device = "/job:worker/cpu:0"

        with tf.device(cpu_device):
            inputs_dict = config.get_batch(worker_batch_size)

        with tf.device(
                tf.train.replica_device_setter(ps_tasks=FLAGS.ps_tasks,
                                               merge_devices=True)):
            global_step = tf.get_variable(
                "global_step", [],
                tf.int32,
                initializer=tf.constant_initializer(0),
                trainable=False)

            # pylint: disable=cell-var-from-loop
            lr = tf.constant(config.learning_rate_schedule[0])
            for key, value in config.learning_rate_schedule.items():
                lr = tf.cond(tf.less(global_step, key), lambda: lr,
                             lambda: tf.constant(value))
            # pylint: enable=cell-var-from-loop
            tf.summary.scalar("learning_rate", lr)

            # build the model graph
            outputs_dict = config.build(inputs_dict, is_training=True)
            loss = outputs_dict["loss"]
            tf.summary.scalar("train_loss", loss)

            worker_replicas = FLAGS.worker_replicas
            ema = tf.train.ExponentialMovingAverage(decay=0.9999,
                                                    num_updates=global_step)
            opt = tf.train.SyncReplicasOptimizer(
                tf.train.AdamOptimizer(lr, epsilon=1e-8),
                worker_replicas,
                total_num_replicas=worker_replicas,
                variable_averages=ema,
                variables_to_average=tf.trainable_variables())

            train_op = opt.minimize(loss,
                                    global_step=global_step,
                                    name="train",
                                    colocate_gradients_with_ops=True)

            session_config = tf.ConfigProto(allow_soft_placement=True)

            is_chief = (FLAGS.task == 0)
            local_init_op = opt.chief_init_op if is_chief else opt.local_step_init_op

            slim.learning.train(
                train_op=train_op,
                logdir=logdir,
                is_chief=is_chief,
                master=FLAGS.master,
                number_of_steps=config.num_iters,
                global_step=global_step,
                log_every_n_steps=250,
                local_init_op=local_init_op,
                save_interval_secs=300,
                sync_optimizer=opt,
                session_config=session_config,
            )
コード例 #25
0
    def __init__(self,
                 network_name,
                 initializer,
                 regularizer,
                 vocab_size,
                 embedding_size,
                 n_class,
                 batch_size,
                 filter_heights,
                 num_filters,
                 num_units,
                 layers=3,
                 *args,
                 **kwargs):
        self.network_name = network_name
        self.initializer = initializer
        self.regularizer = regularizer
        self.vocab_size = vocab_size
        self.n_class = n_class
        self.batch_size = batch_size
        self.filter_heights = filter_heights
        if isinstance(num_filters, list):
            # isinstance: 判断num_filters对象是不是list,是返回True,否则返回False
            if len(self.filter_heights) != len(num_filters):
                raise Exception("filter_heights和num_filters必须长度一致")
            else:
                self.num_filters = num_filters
        elif isinstance(num_filters, int):
            self.num_filters = [num_filters for _ in self.filter_heights]
        else:
            raise Exception("参数num_filters只能是list列表或者int类型的数字!!!")
        self.embedding_size = embedding_size
        self.num_units = num_units
        self.layers = layers

        with tf.variable_scope(self.network_name,
                               initializer=self.initializer,
                               regularizer=self.regularizer):
            # 1. Placeholders for input, output, dropout, batch_size
            with tf.variable_scope("placeholders"):
                self.input = tf.placeholder(tf.int32, [None, None],
                                            name='input_x')
                self.output = tf.placeholder(tf.int32, [None], name='input_y')
                self.dropout_keep_prob = tf.placeholder_with_default(
                    1.0, shape=[], name='dropout_keep_prob')
                self.batch_size = tf.placeholder_with_default(
                    self.batch_size, shape=[], name='batch_size')
                # 计算一个批次中序列的长度(因为填充式填充0)
                # [N,T] -> [N,T] -> [N,T] -> [N,]
                self.lengths = tf.reduce_sum(tf.sign(tf.abs(self.input)),
                                             axis=-1)

            # 1.5 Embedding Layer
            with tf.device('/cpu:0'), tf.name_scope("embedding"):
                self.embedding = tf.Variable(
                    # 指定初始化的范围
                    tf.random_uniform([self.vocab_size, self.embedding_size],
                                      -1.0, 1.0),
                    name="W")
                # embedded_chars结构为[batch_size, sequence_length, embedding_size], [N, T, E]
                self.embedded_chars = tf.nn.embedding_lookup(
                    self.embedding, self.input)
                # 转化为4维的,原本是三维的,tf处理的是4维的,新维度是-1;
                # [batch_size, sequence_length, embedding_size, channel], [N, T, E, 1]
                self.embedded_chars_expanded = tf.expand_dims(
                    self.embedded_chars, -1)

            # 2. Build CNN + LSTM output
            outputs = []
            num_filters_total = 0
            print(filter_heights, num_filters)
            with tf.variable_scope("cnn-rnn"):
                for idx, filter_height in enumerate(self.filter_heights):
                    with tf.variable_scope("conv-%s" % idx):
                        # Convolution Layer
                        num_filters_total += self.num_filters[idx]
                        # filter_size选几个单词h,embedding_size每个占了多长w   7*5*1  输入1维,输出128维 128个特征图
                        filter_shape = [
                            filter_height, self.embedding_size, 1,
                            self.num_filters[idx]
                        ]
                        # 高斯初始化
                        print(filter_shape)
                        W = tf.Variable(tf.truncated_normal(filter_shape,
                                                            stddev=0.01),
                                        name="W")
                        print(W)
                        # 初始化为常量0.1
                        b = tf.Variable(tf.constant(0.1, shape=[num_filters]),
                                        name="b")
                        print(b)
                        conv = tf.nn.conv2d(
                            self.embedded_chars_expanded,
                            W,
                            strides=[1, 1, 1, 1],
                            padding="VALID",  # 不做padding
                            name="conv")
                        # Apply nonlinearity: [N, H, W, C]
                        # N: 样本数目(批次大小)
                        # H: 卷积之后的高度: h = length - filter_height + 1
                        # W: 1
                        # C: self.num_filters[i]
                        h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")

                    with tf.variable_scope("lstm-%s" % idx):
                        # 0. 构建lstm的输入以及长度(因为这里的卷积不进行填充,序列长度会发生改变)
                        lengths = self.lengths - filter_height + 1
                        cell_inputs = tf.squeeze(
                            h, axis=2)  # [B,T,1,D] -> [B,T,D]

                        # 1. 构建RNN Cell
                        def cell(units):
                            return tf.nn.rnn_cell.BasicLSTMCell(units)

                        cell_fw = tf.nn.rnn_cell.MultiRNNCell(cells=[
                            cell(self.num_units) for _ in range(self.layers)
                        ])
                        cell_bw = tf.nn.rnn_cell.MultiRNNCell(cells=[
                            cell(self.num_units) for _ in range(self.layers)
                        ])

                        # 2. 动态构建RNN结构
                        (output_fw,
                         output_bw), _ = tf.nn.bidirectional_dynamic_rnn(
                             cell_fw=cell_fw,  # 正向RNN Cell
                             cell_bw=cell_bw,  # 反向RNN Cell
                             inputs=
                             cell_inputs,  # RNN的输入,动态RNN要求输入的数据格式必须为: [B,T,D]
                             sequence_length=lengths,  # RNN输入数据的序列长度,必须为: [B,]
                             dtype=cell_inputs.dtype  # RNN初始化状态的数据类型
                         )

                        # 3. 结果拼接(如果是做反向的LSTM的话,获取最后一个时刻对应的输出值实际上是无用的)
                        batch_size = tf.shape(output_fw)[0]  # 获取批次大小
                        indices_fw = tf.concat(
                            [
                                tf.reshape(tf.range(batch_size),
                                           shape=(-1, 1)),  # 样本索引, [0,N-1]
                                tf.reshape(
                                    lengths - 1,
                                    shape=(-1, 1))  # 样本长度最后一个时刻的索引值, 每个样本的长度信息
                            ],
                            axis=-1)
                        indices_bw = tf.concat(
                            [
                                tf.reshape(tf.range(batch_size),
                                           shape=(-1, 1)),  # 样本索引, [0,N-1]
                                tf.reshape(tf.zeros_like(lengths - 1),
                                           shape=(-1, 1))  # 反向获取第一个时刻的值,索引位置为0
                            ],
                            axis=-1)
                        # 获取对应索引位置的值后,进行拼接
                        output = tf.concat(
                            (
                                tf.gather_nd(
                                    output_fw, indices_fw
                                ),  # 基于索引获取对应位置的值,[B,U], 获取正向的最后一个时刻的值
                                tf.gather_nd(output_bw, indices_bw
                                             )  # 基于索引获取对应位置的值,[B,U], 获取第一个时刻的值
                            ),
                            axis=-1)
                        outputs.append(output)

                # 做一个合并
                output = tf.concat(outputs, -1)

                # d. 做一个drop out操作
                h_drop = tf.nn.dropout(output,
                                       keep_prob=self.dropout_keep_prob)

            # 3. Build FC output
            with tf.variable_scope("fc"):
                in_units = h_drop.get_shape()[-1]
                w = tf.get_variable(name='w', shape=[in_units, self.n_class])
                b = tf.get_variable(name='b', shape=[self.n_class])
                self.scores = tf.nn.xw_plus_b(h_drop,
                                              weights=w,
                                              biases=b,
                                              name='scores')
                self.predictions = tf.argmax(self.scores,
                                             axis=1,
                                             name='predictions')

            # 4. Build Loss
            with tf.variable_scope("loss"):
                self.losses = tf.reduce_mean(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        labels=self.output, logits=self.scores))
                tf.losses.add_loss(self.losses)
                self.total_loss = tf.losses.get_total_loss(name='total_loss')
                tf.summary.scalar('total_loss', self.total_loss)
                tf.summary.scalar('loss', self.losses)

            # 5. Build Estimate eval
            with tf.variable_scope("accuracy"):
                correct_predictions = tf.equal(self.predictions,
                                               tf.cast(self.output, tf.int64))
                self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                       tf.float32),
                                               name='accuracy')
                tf.summary.scalar('accuracy', self.accuracy)
コード例 #26
0
ファイル: demo.py プロジェクト: yyht/language
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s", name,
                            features[name].shape)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        # Initialize sparse tensors.
        with tf.device("/cpu:0"):
            tf_e2m_data, tf_e2m_indices, tf_e2m_rowsplits = (
                search_utils.load_ragged_matrix("ent2ment", e2m_checkpoint))
            with tf.name_scope("RaggedConstruction"):
                e2m_ragged_ind = tf.RaggedTensor.from_row_splits(
                    values=tf_e2m_indices,
                    row_splits=tf_e2m_rowsplits,
                    validate=False)
                e2m_ragged_val = tf.RaggedTensor.from_row_splits(
                    values=tf_e2m_data,
                    row_splits=tf_e2m_rowsplits,
                    validate=False)

        tf_m2e_map = search_utils.load_database("coref",
                                                [mips_config.num_mentions],
                                                m2e_checkpoint,
                                                dtype=tf.int32)
        entity_ids = search_utils.load_database(
            "entity_ids", [qa_config.num_entities, qa_config.max_entity_len],
            entity_id_checkpoint,
            dtype=tf.int32)
        entity_mask = search_utils.load_database(
            "entity_mask", [qa_config.num_entities, qa_config.max_entity_len],
            entity_mask_checkpoint)

        _, predictions = create_model_fn(
            bert_config=bert_config,
            qa_config=qa_config,
            mips_config=mips_config,
            is_training=is_training,
            features=features,
            ent2ment_ind=e2m_ragged_ind,
            ent2ment_val=e2m_ragged_val,
            ment2ent_map=tf_m2e_map,
            entity_ids=entity_ids,
            entity_mask=entity_mask,
            use_one_hot_embeddings=use_one_hot_embeddings,
            summary_obj=summary_obj)

        tvars = tf.trainable_variables()

        scaffold_fn = None
        if init_checkpoint:
            assignment_map, _ = get_assignment_map_from_checkpoint(
                tvars,
                init_checkpoint,
                load_only_bert=qa_config.load_only_bert)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        output_spec = None
        if mode == tf.estimator.ModeKeys.PREDICT:
            output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode,
                                                       predictions=predictions,
                                                       scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only PREDICT mode is supported: %s" % (mode))

        return output_spec
コード例 #27
0
ファイル: lda2vec.py プロジェクト: lantip/Malaya
    def __init__(
        self,
        num_unique_documents,
        vocab_size,
        num_topics,
        freqs,
        embedding_size=128,
        num_sampled=40,
        learning_rate=1e-3,
        lmbda=150.0,
        alpha=None,
        power=0.75,
        batch_size=32,
        clip_gradients=5.0,
        **kwargs
    ):
        device = get_device(**kwargs)
        _graph = tf.Graph()

        with _graph.as_default():
            with tf.device(device):
                moving_avgs = tf.train.ExponentialMovingAverage(0.9)
                self.batch_size = batch_size
                self.freqs = freqs

                self.X = tf.placeholder(tf.int32, shape=[None])
                self.Y = tf.placeholder(tf.int64, shape=[None])
                self.DOC = tf.placeholder(tf.int32, shape=[None])
                self.switch_loss = tf.Variable(0, trainable=False)
                train_labels = tf.reshape(self.Y, [-1, 1])
                sampler = tf.nn.fixed_unigram_candidate_sampler(
                    train_labels,
                    num_true=1,
                    num_sampled=num_sampled,
                    unique=True,
                    range_max=vocab_size,
                    distortion=power,
                    unigrams=self.freqs,
                )

                self.word_embedding = tf.Variable(
                    tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0)
                )
                self.nce_weights = tf.Variable(
                    tf.truncated_normal(
                        [vocab_size, embedding_size],
                        stddev=tf.sqrt(1 / embedding_size),
                    )
                )
                self.nce_biases = tf.Variable(tf.zeros([vocab_size]))
                scalar = 1 / np.sqrt(num_unique_documents + num_topics)
                self.doc_embedding = tf.Variable(
                    tf.random_normal(
                        [num_unique_documents, num_topics],
                        mean=0,
                        stddev=50 * scalar,
                    )
                )
                self.topic_embedding = tf.get_variable(
                    'topic_embedding',
                    shape=[num_topics, embedding_size],
                    dtype=tf.float32,
                    initializer=tf.orthogonal_initializer(gain=scalar),
                )
                pivot = tf.nn.embedding_lookup(self.word_embedding, self.X)
                proportions = tf.nn.embedding_lookup(
                    self.doc_embedding, self.DOC
                )
                doc = tf.matmul(proportions, self.topic_embedding)
                doc_context = doc
                word_context = pivot
                context = tf.add(word_context, doc_context)
                loss_word2vec = tf.reduce_mean(
                    tf.nn.nce_loss(
                        weights=self.nce_weights,
                        biases=self.nce_biases,
                        labels=self.Y,
                        inputs=context,
                        num_sampled=num_sampled,
                        num_classes=vocab_size,
                        num_true=1,
                        sampled_values=sampler,
                    )
                )
                self.fraction = tf.Variable(
                    1, trainable=False, dtype=tf.float32
                )

                n_topics = self.doc_embedding.get_shape()[1].value
                log_proportions = tf.nn.log_softmax(self.doc_embedding)
                if alpha is None:
                    alpha = 1.0 / n_topics
                loss = (alpha - 1) * log_proportions
                prior = tf.reduce_sum(loss)

                loss_lda = lmbda * self.fraction * prior
                global_step = tf.Variable(
                    0, trainable=False, name='global_step'
                )
                self.cost = tf.cond(
                    global_step < self.switch_loss,
                    lambda: loss_word2vec,
                    lambda: loss_word2vec + loss_lda,
                )
                loss_avgs_op = moving_avgs.apply(
                    [loss_lda, loss_word2vec, self.cost]
                )
                with tf.control_dependencies([loss_avgs_op]):
                    optimizer = tf.train.AdamOptimizer(
                        learning_rate=learning_rate
                    )
                    gvs = optimizer.compute_gradients(self.cost)
                    capped_gvs = [
                        (
                            tf.clip_by_value(
                                grad, -clip_gradients, clip_gradients
                            ),
                            var,
                        )
                        for grad, var in gvs
                    ]
                    self.optimizer = optimizer.apply_gradients(capped_gvs)
                self.sess = generate_session(_graph, **kwargs)
                self.sess.run(tf.global_variables_initializer())
コード例 #28
0
def main(unused_argv=None):
    with tf.Graph().as_default():
        # Force all input processing onto CPU in order to reserve the GPU for the
        # forward inference and back-propagation.
        device = '/cpu:0' if not FLAGS.ps_tasks else '/job:worker/cpu:0'
        with tf.device(
                tf.train.replica_device_setter(FLAGS.ps_tasks,
                                               worker_device=device)):
            inputs, _ = image_utils.imagenet_inputs(FLAGS.batch_size,
                                                    FLAGS.image_size)
            # Load style images and select one at random (for each graph execution, a
            # new random selection occurs)
            style_images, style_labels, \
                style_gram_matrices = image_utils.style_image_inputs(
                    os.path.expanduser(FLAGS.style_dataset_file),
                    batch_size=FLAGS.batch_size,
                    image_size=FLAGS.image_size,
                    square_crop=True,
                    shuffle=True)

        with tf.device(tf.train.replica_device_setter(FLAGS.ps_tasks)):
            # Process style and weight flags
            num_styles = FLAGS.num_styles
            if FLAGS.style_coefficients is None:
                style_coefficients = [1.0 for _ in range(num_styles)]
            else:
                style_coefficients = ast.literal_eval(FLAGS.style_coefficients)
            if len(style_coefficients) != num_styles:
                raise ValueError(
                    'number of style coefficients differs from number of styles'
                )
            content_weights = ast.literal_eval(FLAGS.content_weights)
            style_weights = ast.literal_eval(FLAGS.style_weights)

            # Rescale style weights dynamically based on the current style image
            style_coefficient = tf.gather(tf.constant(style_coefficients),
                                          style_labels)
            style_weights = dict((key, style_coefficient * style_weights[key])
                                 for key in style_weights)

            # Define the model
            stylized_inputs = model.transform(inputs,
                                              alpha=FLAGS.alpha,
                                              normalizer_params={
                                                  'labels': style_labels,
                                                  'num_categories': num_styles,
                                                  'center': True,
                                                  'scale': True
                                              })

            # Compute losses.
            total_loss, loss_dict = learning.total_loss(
                inputs, stylized_inputs, style_gram_matrices, content_weights,
                style_weights)
            for key, value in loss_dict.items():
                tf.summary.scalar(key, value)

            # Adding Image summaries to the tensorboard.
            tf.summary.image('image/0_inputs', inputs, 3)
            tf.summary.image('image/1_styles', style_images, 3)
            tf.summary.image('image/2_styled_inputs', stylized_inputs, 3)

            # Set up training
            optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
            train_op = slim.learning.create_train_op(
                total_loss,
                optimizer,
                clip_gradient_norm=FLAGS.clip_gradient_norm,
                summarize_gradients=False)

            # Function to restore VGG16 parameters.
            init_fn_vgg = slim.assign_from_checkpoint_fn(
                vgg.checkpoint_file(), slim.get_variables('vgg_16'))

            # Run training
            slim.learning.train(train_op=train_op,
                                logdir=os.path.expanduser(FLAGS.train_dir),
                                master=FLAGS.master,
                                is_chief=FLAGS.task == 0,
                                number_of_steps=FLAGS.train_steps,
                                init_fn=init_fn_vgg,
                                save_summaries_secs=FLAGS.save_summaries_secs,
                                save_interval_secs=FLAGS.save_interval_secs)
コード例 #29
0
ファイル: train.py プロジェクト: christinazavou/pointnet
def train():
    with tf.Graph().as_default():
        with tf.device('/gpu:' + str(GPU_INDEX)):
            pointclouds_pl, labels_pl = placeholder_inputs(
                BATCH_SIZE, NUM_POINT)
            is_training_pl = tf.placeholder(tf.bool, shape=())

            # Note the global_step=batch parameter to minimize.
            # That tells the optimizer to helpfully increment the 'batch' parameter for you every time it trains.
            batch = tf.Variable(0)
            bn_decay = get_bn_decay(batch)
            tf.summary.scalar('bn_decay', bn_decay)

            # Get model and loss
            pred = get_model(pointclouds_pl, is_training_pl, bn_decay=bn_decay)
            loss = get_loss(pred, labels_pl)
            tf.summary.scalar('loss', loss)

            correct = tf.equal(tf.argmax(pred, 2), tf.to_int64(labels_pl))
            accuracy = tf.reduce_sum(tf.cast(correct, tf.float32)) / float(
                BATCH_SIZE * NUM_POINT)
            tf.summary.scalar('accuracy', accuracy)

            # Get training operator
            learning_rate = get_learning_rate(batch)
            tf.summary.scalar('learning_rate', learning_rate)
            if OPTIMIZER == 'momentum':
                optimizer = tf.train.MomentumOptimizer(learning_rate,
                                                       momentum=MOMENTUM)
            elif OPTIMIZER == 'adam':
                optimizer = tf.train.AdamOptimizer(learning_rate)
            train_op = optimizer.minimize(loss, global_step=batch)

            # Add ops to save and restore all the variables.
            saver = tf.train.Saver()

        # Create a session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        config.log_device_placement = True
        sess = tf.Session(config=config)

        # Add summary writers
        merged = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'),
                                             sess.graph)
        test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test'))

        # Init variables
        init = tf.global_variables_initializer()
        sess.run(init, {is_training_pl: True})

        ops = {
            'pointclouds_pl': pointclouds_pl,
            'labels_pl': labels_pl,
            'is_training_pl': is_training_pl,
            'pred': pred,
            'loss': loss,
            'train_op': train_op,
            'merged': merged,
            'step': batch
        }

        for epoch in range(MAX_EPOCH):
            log_string('**** EPOCH %03d ****' % (epoch))
            sys.stdout.flush()

            train_one_epoch(sess, ops, train_writer)
            eval_one_epoch(sess, ops, test_writer)

            # Save the variables to disk.
            if epoch % 10 == 0:
                save_path = saver.save(sess,
                                       os.path.join(LOG_DIR, "model.ckpt"))
                log_string("Model saved in file: %s" % save_path)
コード例 #30
0
    def train_a_model(input_seq,
                      mask_seq,
                      label_seq,
                      vocab_size,
                      d_model,
                      head,
                      init_weights,
                      print_output=False):
        # Clear all stuffs in default graph, so we can start fresh
        tf.reset_default_graph()

        with tf.device(USED_DEVICE):
            # We want each session to have different random seed, but we need each run to have the same random sequence
            tf.set_random_seed(random.randint(0, 65535))

            batch_size = len(input_seq[0])
            seq_len = len(input_seq[0][0])

            sess = setup_tensorflow_session()
            (input_tensor, mask_tensor, output_tensor, disagreement_cost,
             logprob_tensor) = build_model(batch=batch_size,
                                           seq_len=seq_len,
                                           vocab_size=vocab_size,
                                           d_model=d_model,
                                           head=head)
            (label_tensor, train_op, loss,
             classification_loss) = build_train_graph(
                 output_tensor=output_tensor,
                 batch=batch_size,
                 seq_len=seq_len,
                 d_model=d_model,
                 additional_costs=[disagreement_cost])
            sess.run(tf.global_variables_initializer())

            if init_weights is not None:
                set_all_variables(sess, init_weights)

            for i in range(LOCAL_TRAIN_EPOCH):
                avg_loss = 0.0
                avg_disagreement_loss = 0.0
                avg_classification_loss = 0.0
                avg_accuracy = 0.0
                for input_sample, mask_sample, label_sample in zip(
                        input_seq, mask_seq, label_seq):
                    [
                        output_vals, loss_vals, disagreement_cost_vals,
                        classification_loss_vals, logprob_vals, _
                    ] = sess.run(
                        [
                            output_tensor, loss, disagreement_cost,
                            classification_loss, logprob_tensor, train_op
                        ],
                        feed_dict={
                            input_tensor: input_sample,
                            mask_tensor: mask_sample,
                            label_tensor: label_sample
                        })
                    avg_loss = avg_loss + loss_vals
                    avg_disagreement_loss = avg_disagreement_loss + disagreement_cost_vals
                    avg_classification_loss = avg_classification_loss + classification_loss_vals
                    labels = np.array(label_sample)
                    predictions = (logprob_vals >= 0.5).astype(int)
                    scores = (predictions == labels).astype(int)
                    scores = np.average(scores)
                    avg_accuracy = avg_accuracy + scores
                avg_loss = avg_loss / len(input_seq)
                avg_disagreement_loss = avg_disagreement_loss / len(input_seq)
                avg_classification_loss = avg_classification_loss / len(
                    input_seq)
                avg_accuracy = avg_accuracy / len(input_seq)
                if print_output:
                    print('EPOCH: ' + str(i))

            if print_output:
                print('=== Input Values ===')
                print(input_seq)
                print('=== Label Values ===')
                print(label_seq)
                print('=== Output Values ===')
                print(output_vals)
                print('=== Loss Values ===')
                print(avg_loss)
                print('=== Classification Loss Values ===')
                print(avg_classification_loss)
                print('=== Disagreement Loss Values ===')
                print(avg_disagreement_loss)
                print('=== Accuracy ===')
                print(avg_accuracy)

            trained_weights = get_all_variables(sess)
            return [
                avg_loss, avg_disagreement_loss, avg_classification_loss,
                avg_accuracy, trained_weights
            ]