Exemple #1
0
def create_single_experiment_figures(src: str, dst: str) -> None:
    """Create single experiment figures for the Rainbow results.

    Read in the raw tables from SRC and write out the figures to DST.
    """
    utils.configure_logging(clear=True)

    for topic, experiment_to_figure_configs in tqdm.tqdm(
            TOPIC_TO_FIGURE_CONFIG.items(), **settings.TQDM_KWARGS):
        for experiment, figure_configs in tqdm.tqdm(
                experiment_to_figure_configs.items(), **settings.TQDM_KWARGS):
            for config in tqdm.tqdm(figure_configs, **settings.TQDM_KWARGS):
                os.makedirs(
                    os.path.join(dst, topic, experiment, config.fig_name))

                src_path = os.path.join(src, topic, experiment,
                                        config.data_fname)
                data = (pd.read_csv(src_path) if src_path.endswith("csv") else
                        pd.read_json(src_path, lines=True))
                for key, subdata in data.groupby(config.split_key):
                    fig, axes = config.plot_func(data=subdata,
                                                 **config.plot_kwargs)

                    dst_path = os.path.join(
                        dst,
                        topic,
                        experiment,
                        config.fig_name,
                        ".".join(list(key) + [config.fig_name, "png"]),
                    )
                    fig.savefig(dst_path)
                    plt.close(fig)
Exemple #2
0
    def test_does_remove_handlers_when_clear_is_true(self):
        # Clear all log handlers from the root to prepare for the test.
        handlers = logging.root.handlers[:]
        for handler in handlers:
            logging.root.removeHandler(handler)

        # Run the test.
        self.assertEqual(len(logging.root.handlers), 0)

        logging.basicConfig()
        basic_handler = logging.root.handlers[0]

        self.assertEqual(len(logging.root.handlers), 1)

        handler = utils.configure_logging(clear=True)

        self.assertNotIn(basic_handler, logging.root.handlers)
        self.assertIn(handler, logging.root.handlers)
        self.assertEqual(len(logging.root.handlers), 1)

        # Restore the log handlers.
        #   First, remove the existing handlers.
        for handler in logging.root.handlers:
            logging.root.removeHandler(handler)
        #   Next, add back the initial handlers.
        for handler in handlers:
            logging.root.addHandler(handler)
Exemple #3
0
    def test_attaches_log_handler(self):
        n_handlers_before = len(logging.root.handlers)

        handler = utils.configure_logging()

        self.assertEqual(len(logging.root.handlers), n_handlers_before + 1)
        self.assertIn(handler, logging.root.handlers)

        logging.root.removeHandler(handler)
Exemple #4
0
def evaluate(
    mixture: str,
    results_dir: str,
    split: str,
    batch_size: int,
    model_parallelism: int,
    tpu_name: str,
    tpu_topology: str,
) -> None:
    """Evaluate the model located at RESULTS_DIR on MIXTURE."""
    utils.configure_logging(clear=True)

    # Validate arguments.

    if not results_dir.startswith("gs://"):
        raise ValueError(f"RESULTS_DIR ({results_dir}) must be a GCS path.")
    elif not tf.io.gfile.exists(results_dir):
        raise IOError(f"RESULTS_DIR ({results_dir}) doesn't exist.")

    # Run evaluation.

    model = t5.models.MtfModel(
        model_dir=results_dir,
        tpu=tpu_name,
        tpu_topology=tpu_topology,
        model_parallelism=model_parallelism,
        batch_size=batch_size,
        sequence_length={
            "inputs": 512,
            "targets": 512
        },
        learning_rate_schedule=None,
        save_checkpoints_steps=5000,
        keep_checkpoint_max=None,
        iterations_per_loop=100,
    )

    model.eval(
        mixture_or_task_name=mixture,
        checkpoint_steps="all",
        split=split,
    )
def generate_latex_tables(src: str, dst: str) -> None:
    """Generate latex tables from Rainbow's raw tables.

    Read the raw tables from SRC and write out latex tables to DST. The
    generated tables assume the document has macros for \anli{},
    \cosmosqa{}, \hellaswag{}, \physicaliqa{}, \socialiqa{},
    \winogrande{}, \commonsenseqa{}, \joci{}, \rainbow{}, \glue{},
    \superglue{}, \none{}, \atomic{}, \conceptnet{}, and \together{}.
    """
    utils.configure_logging(clear=True)

    for experiment, tables_config in EXPERIMENT_TO_LATEX_TABLE_CONFIG.items():
        for name, config in tables_config.items():
            src_fpath = os.path.join(src, experiment, name, "table.csv")
            dst_dpath = os.path.join(dst, experiment, name)
            os.makedirs(dst_dpath)
            write_latex_tables_for_config(
                src_fpath=src_fpath,
                dst_dpath=dst_dpath,
                table_name=name,
                **config,
            )
Exemple #6
0
def prepare(src: str, dst: str, force_download: bool) -> None:
    """Prepare all relevant datasets for text-to-text modeling.

    Download to and read the datasets from --src, transform them into
    CSVs suitable for text-to-text models, then write the results to
    --dst. Google storage paths are supported.
    """
    utils.configure_logging(clear=True)

    # Validate the arguments.

    if tf.io.gfile.exists(dst):
        raise IOError(f"Destination directory ({dst}) already exists.")

    # Download and preprocess the datasets.

    preparation.rainbow.RainbowPreparer().prepare(
        src=src, dst=dst, force_download=force_download)

    preparation.atomic.AtomicPreparer().prepare(src=src,
                                                dst=dst,
                                                force_download=force_download)

    preparation.conceptnet.ConceptNetPreparer().prepare(
        src=src, dst=dst, force_download=force_download)

    preparation.commonsenseqa.CommonsenseQAPreparer().prepare(
        src=src, dst=dst, force_download=force_download)

    preparation.joci.JOCIPreparer().prepare(src=src,
                                            dst=dst,
                                            force_download=force_download)

    preparation.cyc.CycICPreparer().prepare(src=src,
                                            dst=dst,
                                            force_download=force_download)

    logger.info(f"All datasets have been prepared.")
Exemple #7
0
def fine_tune(
    mixture: str,
    results_dir: str,
    split: str,
    pretrained_model: str,
    n_steps: int,
    learning_rate: float,
    batch_size: int,
    model_parallelism: int,
    save_checkpoints_steps: int,
    n_checkpoints_to_keep: int,
    tpu_name: str,
    tpu_topology: str,
) -> None:
    """Fine-tune the model on MIXTURE, writing results to RESULTS_DIR."""
    utils.configure_logging(clear=True)

    # Validate arguments.

    if not results_dir.startswith("gs://"):
        raise ValueError(f"RESULTS_DIR ({results_dir}) must be a GCS path.")

    if pretrained_model.startswith("gs://"):
        if not tf.io.gfile.exists(pretrained_model):
            raise IOError(
                f"--pretrained-model ({pretrained_model}) does not exist."
            )
    else:
        if pretrained_model not in PRETRAINED_MODELS:
            raise ValueError(
                f"--pretrained-model ({pretrained_model}) not recognized. It"
                f" must either be a GCS path or one of"
                f' {", ".join(PRETRAINED_MODELS.keys())}.'
            )

    # Process arguments.

    if pretrained_model in PRETRAINED_MODELS:
        pretrained_model = PRETRAINED_MODELS[pretrained_model]

    # Run fine-tuning.

    model = t5.models.MtfModel(
        model_dir=results_dir,
        tpu=tpu_name,
        tpu_topology=tpu_topology,
        model_parallelism=model_parallelism,
        batch_size=batch_size,
        sequence_length={"inputs": 512, "targets": 512},
        learning_rate_schedule=learning_rate,
        save_checkpoints_steps=save_checkpoints_steps,
        keep_checkpoint_max=n_checkpoints_to_keep,
        iterations_per_loop=100,
    )

    model.finetune(
        mixture_or_task_name=mixture,
        pretrained_model_dir=pretrained_model,
        finetune_steps=n_steps,
        split=split,
    )
Exemple #8
0
    def test_verbose_defaults_to_false(self):
        handler = utils.configure_logging()

        self.assertEqual(handler.level, logging.INFO)

        logging.root.removeHandler(handler)
Exemple #9
0
    def test_verbose_false_sets_log_level_to_info(self):
        handler = utils.configure_logging(verbose=False)

        self.assertEqual(handler.level, logging.INFO)

        logging.root.removeHandler(handler)
Exemple #10
0
    def test_verbose_true_sets_log_level_to_debug(self):
        handler = utils.configure_logging(verbose=True)

        self.assertEqual(handler.level, logging.DEBUG)

        logging.root.removeHandler(handler)
def generate_tables(src: str, dst: str) -> None:
    """Generate tables from Rainbow's experimental results.

    Read the experimental results for Rainbow from SRC and write tables
    for the experiments out to DST, in the CSV format. Full curves from
    training are also written out in the JSON Lines format.
    """
    utils.configure_logging(clear=True)

    for experiment, tables_config in EXPERIMENT_TO_TABLES_CONFIG.items():
        for name, (path, factors, ignore_dirs) in tables_config.items():
            # Parse the data using the config.
            training_curves = parse_training_curves(
                dpath=os.path.join(src, experiment, path),
                ignore_dirs=ignore_dirs,
                **factors,
            )
            # Convert the training curve data from nested dictionaries
            # into two lists of dictionaries, using a depth-first
            # search. The first list of dictionaries, table, provides a
            # table grouping each hyper-parameter configuration to the
            # best score from training (i.e., early stopping). The
            # second list of dictionaries, training_curves_table, groups
            # each hyper-parameter configuration with the full training
            # curve.
            table = []
            training_curves_table = []
            values = []
            stack = [(value, children, 0)
                     for value, children in list(training_curves.items())[::-1]
                     ]
            while len(stack) > 0:
                # Pop the node off the stack.
                value, children, depth = stack.pop()
                # Truncate the values to the correct depth.
                values = values[:depth]
                # Update values with the current value.
                values.append(value)
                # Handle the node.
                if isinstance(children, list):
                    # The node is a leaf (training curves).
                    best_score = max(score for _, score in children)
                    table.append(values + [best_score])
                    training_curves_table.append(values + [children])
                elif isinstance(children, dict):
                    # The node is an internal node.
                    for value, children in list(children.items())[::-1]:
                        stack.append((value, children, depth + 1))
            # Write out the data.
            os.makedirs(os.path.join(dst, experiment, name))
            # Write the training curves.
            training_curves_table_path = os.path.join(dst, experiment, name,
                                                      "training-curves.jsonl")
            with open(training_curves_table_path, "w") as fout:
                fieldnames = list(factors.keys()) + ["training_curve"]
                for row in training_curves_table:
                    # Identify the task.
                    task = next(value
                                for factor, value in zip(fieldnames, row)
                                if factor == "task")
                    # Skip all sizes where the requested training set size is
                    # larger than the available training data, except when size
                    # is 16000. In these cases, the training data used is the
                    # dataset's full training set size. We skip all except when
                    # size is 16000 because we want to avoid repeat runs at the
                    # same training set size (to simplify comparisons across
                    # the experiments).
                    try:
                        size = next(value
                                    for factor, value in zip(fieldnames, row)
                                    if factor == "size")
                    except StopIteration:
                        pass
                    else:
                        dataset = TASK_TO_DATASET[task]
                        if (int(size) >= dataset.splits["train"].size
                                and size != "16000"):
                            continue
                    # Write the training curve to disk.
                    fout.write(
                        json.dumps({
                            factor: process_factor(
                                task=task,
                                key=factor,
                                value=value,
                            )
                            for factor, value in zip(fieldnames, row)
                        }) + "\n")
            # Write the results table.
            table_path = os.path.join(dst, experiment, name, "table.csv")
            with open(table_path, "w") as fout:
                fieldnames = list(factors.keys()) + ["best_score"]
                writer = csv.DictWriter(f=fout,
                                        fieldnames=fieldnames,
                                        dialect="unix")

                writer.writeheader()
                for row in table:
                    # Identify the task.
                    task = next(value
                                for factor, value in zip(fieldnames, row)
                                if factor == "task")
                    # Skip all sizes where the requested training set size is
                    # larger than the available training data, except when size
                    # is 16000. In these cases, the training data used is the
                    # dataset's full training set size. We skip all except when
                    # size is 16000 because we want to avoid repeat runs at the
                    # same training set size (to simplify comparisons across
                    # the experiments).
                    try:
                        size = next(value
                                    for factor, value in zip(fieldnames, row)
                                    if factor == "size")
                    except StopIteration:
                        pass
                    else:
                        dataset = TASK_TO_DATASET[task]
                        if (int(size) >= dataset.splits["train"].size
                                and size != "16000"):
                            continue
                    # Write the table to disk.
                    writer.writerow({
                        factor: process_factor(
                            task=task,
                            key=factor,
                            value=value,
                        )
                        for factor, value in zip(fieldnames, row)
                    })