Esempi in Python per build_validation_dataset

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: utils

Metodo/funzione: build_validation_dataset

Esempi su hotexamples.com: 3

build_validation_dataset in Python: 3 esempi trovati. Questi sono i migliori esempi reali in Python per utils.build_validation_dataset, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

def prepare_datasets(dataset_name=gin.REQUIRED,
                     shuffle_input_sentences=False,
                     num_eval_examples=2000,
                     batch_size=32):
    """Create batched, properly-formatted datasets from the TFDS datasets.

  Args:
    dataset_name: Name of TFDS dataset.
    shuffle_input_sentences: Not used during evaluation, but arg still needed
      for gin compatibility.
    num_eval_examples: Number of examples to use during evaluation. For the
      nolabel evaluation, this is also the number of distractors we choose
      between.
    batch_size: Batch size.

  Returns:
    A dictionary mapping from the dataset split to a Dataset object.
  """
    del shuffle_input_sentences

    splits_to_load = {
        'valid_nolabel': 'train[:2%]',
        'train_nolabel': 'train[2%:4%]',
        'valid2018': rocstories_sentence_embeddings.VALIDATION_2018,
        'valid2016': rocstories_sentence_embeddings.VALIDATION_2016
    }

    datasets = tfds.load(dataset_name,
                         data_dir=FLAGS.data_dir,
                         split=splits_to_load,
                         download=False)

    emb_matrices = {}

    valid_nolabel_ds = utils.build_train_style_dataset(
        datasets['valid_nolabel'],
        batch_size,
        False,
        num_examples=num_eval_examples,
        is_training=False)
    datasets['valid_nolabel'], emb_matrices['valid_nolabel'] = valid_nolabel_ds

    train_nolabel_ds = utils.build_train_style_dataset(
        datasets['train_nolabel'],
        batch_size,
        False,
        num_examples=num_eval_examples,
        is_training=False)
    datasets['train_nolabel'], emb_matrices['train_nolabel'] = train_nolabel_ds

    # Convert official evaluation datasets to validation data format. There are no
    # embedding matrices involved here since the task has only two possible next
    # sentences to pick between for each example. Ignore num_eval_examples and use
    # the full datasets for these.
    datasets['valid2018'] = utils.build_validation_dataset(
        datasets['valid2018'])
    datasets['valid2016'] = utils.build_validation_dataset(
        datasets['valid2016'])

    return datasets, emb_matrices

Esempio n. 2

Mostra file

File: evaluate_story_cloze_test.py Progetto: MitchellTesla/google-research

def prepare_dataset(dataset_name=gin.REQUIRED,
                    shuffle_input_sentences=False,
                    num_eval_examples=2000,
                    batch_size=32):
    """Create batched, properly-formatted datasets from the TFDS datasets.

  Args:
    dataset_name: Name of TFDS dataset.
    shuffle_input_sentences: Not used during evaluation, but arg still needed
      for gin compatibility.
    num_eval_examples: Number of examples to use during evaluation. For the
      nolabel evaluation, this is also the number of distractors we choose
      between.
    batch_size: Batch size.

  Returns:
    A dictionary mapping from the dataset split to a Dataset object.
  """

    del batch_size
    del num_eval_examples
    del shuffle_input_sentences

    dataset = tfds.load(dataset_name,
                        data_dir=FLAGS.data_dir,
                        split=rocstories_sentence_embeddings.TEST_2016,
                        download=False)
    dataset = utils.build_validation_dataset(dataset)
    return dataset

Esempio n. 3

Mostra file

File: train.py Progetto: sarthakksu/covid-low-income-bam

def prepare_datasets(dataset_name=gin.REQUIRED,
                     shuffle_input_sentences=False,
                     num_eval_examples=2000,
                     batch_size=32):
  """Create batched, properly-formatted datasets from the TFDS datasets.

  Args:
    dataset_name: Name of TFDS dataset.
    shuffle_input_sentences: If True, the order of the input sentences is
      randomized.
    num_eval_examples: Number of examples to use during evaluation. For the
      nolabel evaluation, this is also the number of distractors we choose
      between.
    batch_size: Batch size.

  Returns:
    A dictionary mapping from the dataset split to a Dataset object.
  """
  splits_to_load = {
      'valid_nolabel': 'train[:2%]',
      'train': 'train[2%:]',
      'train_nolabel': 'train[2%:4%]',
      'valid2018': rocstories_sentence_embeddings.VALIDATION_2018,
      'valid2016': rocstories_sentence_embeddings.VALIDATION_2016}

  datasets = tfds.load(
      dataset_name,
      data_dir=FLAGS.data_dir,
      split=splits_to_load,
      download=False)

  emb_matrices = {}
  # Convert datasets to expected training data format, and build of the
  # embedding matrices.
  train_ds = utils.build_train_style_dataset(
      datasets['train'], batch_size, shuffle_input_sentences)
  datasets['train'], emb_matrices['train'] = train_ds

  valid_nolabel_ds = utils.build_train_style_dataset(
      datasets['valid_nolabel'], batch_size, False,
      num_examples=num_eval_examples)
  datasets['valid_nolabel'], emb_matrices['valid_nolabel'] = valid_nolabel_ds

  train_nolabel_ds = utils.build_train_style_dataset(
      datasets['train_nolabel'], batch_size, False,
      num_examples=num_eval_examples)
  datasets['train_nolabel'], emb_matrices['train_nolabel'] = train_nolabel_ds

  # Convert official evaluation datasets to validation data format. There are no
  # embedding matrices involved here since the task has only two possible next
  # sentences to pick between for each example.
  datasets['valid2018'] = utils.build_validation_dataset(
      datasets['valid2018']).take(num_eval_examples)
  datasets['valid2016'] = utils.build_validation_dataset(
      datasets['valid2016']).take(num_eval_examples)

  logging.info('EMBEDDING MATRICES CREATED:')
  for key in emb_matrices:
    logging.info('%s: %s', key, emb_matrices[key].shape)

  return datasets, emb_matrices