Example #1
0
def prepare_global_logging(params) -> None:
    """
    This function configures 3 global logging attributes - streaming stdout and stderr
    to a file as well as the terminal, setting the formatting for the python logging
    library and setting the interval frequency for the Tqdm progress bar.
    Note that this function does not set the logging level, which is set in ``allennlp/run.py``.
    Parameters
    ----------
    serializezation_dir : ``str``, required.
        The directory to stream logs to.
    file_friendly_logging : ``bool``, required.
        Whether logs should clean the output to prevent carridge returns
        (used to update progress bars on a single terminal line).
    """
    serialization_dir = params['serialization_dir']
    file_friendly_logging = params['file_friendly_logging']
    Tqdm.set_slower_interval(file_friendly_logging)
    std_out_file = os.path.join(serialization_dir, "stdout.log")
    sys.stdout = TeeLogger(
        std_out_file,  # type: ignore
        sys.stdout,
        file_friendly_logging)
    sys.stderr = TeeLogger(
        os.path.join(serialization_dir, "stderr.log"),  # type: ignore
        sys.stderr,
        file_friendly_logging)

    logging.init_logger(log_file=std_out_file)
import os
import argparse

from miso.utils.params import Params
from miso.utils import logging
from miso.data.iterators import BucketIterator, BasicIterator
from miso.data.token_indexers import SingleIdTokenIndexer, TokenCharactersIndexer
from miso.data.dataset_readers import RAMSDatasetReader
from miso.data.dataset_readers import GVDBDatasetReader

logger = logging.init_logger()

def load_dataset_reader(dataset_type, *args, **kwargs):
    if dataset_type == "RAMS":
        dataset_reader = RAMSDatasetReader(
            max_trigger_span_width=kwargs.get('max_trigger_span_width'),
            max_arg_span_width=kwargs.get('max_arg_span_width'),
            use_gold_triggers=kwargs.get('use_gold_triggers'),
            use_gold_arguments=kwargs.get('use_gold_arguments'),
            annotation_mode=kwargs.get('annotation_mode'),
            language=kwargs.get('language'),
            genres=kwargs.get('genres'),
            token_indexers=dict(
                tokens=SingleIdTokenIndexer(namespace='tokens'),
                token_characters=TokenCharactersIndexer(namespace='characters')
            )
        )
    elif dataset_type == "GVDB":
        dataset_reader = GVDBDatasetReader(
            max_trigger_span_width=kwargs.get('max_trigger_span_width'),
            max_arg_span_width=kwargs.get('max_arg_span_width'),
Example #3
0
from typing import Dict, Union, List, Set

import numpy
import torch

from miso.utils import logging
from miso.utils.checks import ConfigurationError
from miso.utils.params import Params
from miso.utils.nn import get_device_of, device_mapping
from miso.utils.environment import move_to_device
from miso.data.dataset import Batch
from miso.utils.params import remove_pretrained_embedding_params
from miso.data.vocabulary import Vocabulary
from miso import models as Models

logger = logging.init_logger(__name__)  # pylint: disable=invalid-name

# When training a model, many sets of weights are saved. By default we want to
# save/load this set of weights.
_DEFAULT_WEIGHTS = "best.th"


class Model(torch.nn.Module):
    """
    This abstract class represents a model to be trained. Rather than relying completely
    on the Pytorch Module, we modify the output spec of ``forward`` to be a dictionary.
    Models built using this API are still compatible with other pytorch models and can
    be used naturally as modules within other models - outputs are dictionaries, which
    can be unpacked and passed into other layers. One caveat to this is that if you
    wish to use an AllenNLP model inside a Container (such as nn.Sequential), you must
    interleave the models with a wrapper module which unpacks the dictionary into