コード例 #1
0
def _get_predictor(args: argparse.Namespace) -> Predictor:
    check_for_gpu(args.cuda_device)
    params = Params.from_file(args.extractor_config_file)

    model = Model.from_params(vocab=None, params=params.pop('model'))
    if args.cuda_device >= 0:
        model.to(args.cuda_device)
    else:
        model.to(None)

    archive = Archive(model=model, config=params)

    return Predictor.from_archive(
        archive,
        args.predictor,
        dataset_reader_to_load=args.dataset_reader_choice)
コード例 #2
0
def predict(model: Model, model_params: Params,
            data_fp: Path) -> List[Dict[str, Any]]:
    '''
    :param model: Model to be used to generate the predictions
    :param params: An object that describes the model
    :param data_fp: File path to be used to predict on
    :returns: The data from the data file path with predictions.
    '''
    model.eval()
    archive = Archive(model=model, config=model_params)
    predictor = Predictor.from_archive(archive, 'emotion-classifier')
    predicted_samples = get_predictions(data_fp,
                                        predictor,
                                        incl_labels=False,
                                        vocab=model.vocab)
    return predicted_samples
コード例 #3
0
ファイル: archival_utils.py プロジェクト: yuvalkry/qfirst
def load_archive_from_folder(archive_file: str,
                             cuda_device: int = -1,
                             overrides: str = "",
                             weights_file: str = None) -> Archive:
    # redirect to the cache, if necessary
    resolved_archive_file = cached_path(archive_file)

    logger.info(f"loading model from direactory {archive_file}")

    serialization_dir = resolved_archive_file

    # Check for supplemental files in archive
    fta_filename = os.path.join(serialization_dir, _FTA_NAME)
    if os.path.exists(fta_filename):
        with open(fta_filename, 'r') as fta_file:
            files_to_archive = json.loads(fta_file.read())

        # Add these replacements to overrides
        replacements_dict: Dict[str, Any] = {}
        for key, filename  in files_to_archive.items():
            if not filename.startswith("/"):
                filename = os.path.join(serialization_dir, f"fta/{key}")
            replacements_dict[key] = filename

        overrides_dict = parse_overrides(overrides)
        combined_dict = with_fallback(preferred=unflatten(replacements_dict), fallback=overrides_dict)
        overrides = json.dumps(combined_dict)

    # Load config
    config = Params.from_file(os.path.join(serialization_dir, CONFIG_NAME), overrides)
    config.loading_from_archive = True

    if weights_file:
        weights_path = weights_file
    else:
        weights_path = os.path.join(serialization_dir, _WEIGHTS_NAME)

    # Instantiate model. Use a duplicate of the config, as it will get consumed.
    model = Model.load(config.duplicate(),
                       weights_file=weights_path,
                       serialization_dir=serialization_dir,
                       cuda_device=cuda_device)

    return Archive(model=model, config=config)
コード例 #4
0
def _get_predictor(args: argparse.Namespace) -> Predictor:
    check_for_gpu(args.cuda_device)
    params = Params.from_file(args.scorer_config_file)
    archive = load_archive(
        args.archive_file,
        weights_file=args.weights_file,
        cuda_device=args.cuda_device,
        overrides=args.overrides,
    )

    model = Model.from_params(vocab=None, model=archive.model, params=params)
    model.to(args.cuda_device)

    archive = Archive(model=model, config=archive.config)

    return Predictor.from_archive(
        archive,
        args.predictor,
        dataset_reader_to_load=args.dataset_reader_choice)
コード例 #5
0
    inp_fn = args["--in"]
    batch_size = int(args["--batch-size"])
    out_fn = args["--out"]
    model_path = args["--model-path"]
    cuda_device = int(args["--cuda-device"]) if (args["--cuda-device"] is not None) \
                  else -1
    debug = args["--debug"]
    if debug:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.INFO)

    # Init OIE
    raw_model, config = openie_model(model_path)
    # insert nndct code
    archive = Archive(model=raw_model, config=config)
    model = Predictor.from_archive(archive, "open-information-extraction")
    # model = open_information_extraction_stanovsky_2018()

    # Move model to gpu, if requested
    if cuda_device >= 0:
        model._model.cuda(cuda_device)

    lines = [line.strip() for line in open(inp_fn, encoding="utf8")]

    # process sentences
    logging.info("Processing sentences")
    oie_lines = []
    t1 = time.perf_counter()
    for chunk in tqdm(chunks(lines, batch_size)):
        oie_inputs = []
コード例 #6
0
def _load_archive(archive_file: str,
                  adapters_dir: str,
                 cuda_device: int = -1,
                 overrides: str = "",
                 weights_file: str = None):
    """
    Instantiates an Archive from an archived `tar.gz` file.

    Parameters
    ----------
    archive_file: ``str``
        The archive file to load the model from.
    weights_file: ``str``, optional (default = None)
        The weights file to use.  If unspecified, weights.th in the archive_file will be used.
    cuda_device: ``int``, optional (default = -1)
        If `cuda_device` is >= 0, the model will be loaded onto the
        corresponding GPU. Otherwise it will be loaded onto the CPU.
    overrides: ``str``, optional (default = "")
        JSON overrides to apply to the unarchived ``Params`` object.
    """

    # redirect to the cache, if necessary
    resolved_archive_file = cached_path(archive_file)

    if resolved_archive_file == archive_file:
        logger.info(f"loading archive file {archive_file}")
    else:
        logger.info(f"loading archive file {archive_file} from cache at {resolved_archive_file}")

    if os.path.isdir(resolved_archive_file):
        serialization_dir = resolved_archive_file
    else:
        # Extract archive to temp dir
        tempdir = tempfile.mkdtemp()
        logger.info(f"extracting archive file {resolved_archive_file} to temp dir {tempdir}")
        with tarfile.open(resolved_archive_file, 'r:gz') as archive:
            archive.extractall(tempdir)
        # Postpone cleanup until exit in case the unarchived contents are needed outside
        # this function.
        atexit.register(_cleanup_archive_dir, tempdir)

        serialization_dir = tempdir

    # Check for supplemental files in archive
    fta_filename = os.path.join(serialization_dir, "files_to_archive.json")
    if os.path.exists(fta_filename):
        with open(fta_filename, 'r') as fta_file:
            files_to_archive = json.loads(fta_file.read())

        # Add these replacements to overrides
        replacements_dict: Dict[str, Any] = {}
        for key, original_filename in files_to_archive.items():
            replacement_filename = os.path.join(serialization_dir, f"fta/{key}")
            if os.path.exists(replacement_filename):
                replacements_dict[key] = replacement_filename
            else:
                logger.warning(f"Archived file {replacement_filename} not found! At train time "
                               f"this file was located at {original_filename}. This may be "
                               "because you are loading a serialization directory. Attempting to "
                               "load the file from its train-time location.")

        overrides_dict = parse_overrides(overrides)
        combined_dict = with_fallback(preferred=overrides_dict, fallback=unflatten(replacements_dict))
        overrides = json.dumps(combined_dict)

    # Load config
    config = Params.from_file(os.path.join(serialization_dir, "config.json"), overrides)
    config.loading_from_archive = True

    if weights_file:
        weights_path = weights_file
    else:
        weights_path = os.path.join(serialization_dir, "weights.th")
        # Fallback for serialization directories.
        if not os.path.exists(weights_path):
            weights_path = os.path.join(serialization_dir, "best.th")


    # Instantiate model. Use a duplicate of the config, as it will get consumed.
    model = _load(config.duplicate(),
                  adapters_dir=adapters_dir,
                  weights_file=weights_path,
                  serialization_dir=serialization_dir,
                  cuda_device=cuda_device)

    return Archive(model=model, config=config)
コード例 #7
0

resolved_archive_file = cached_path(args.archive_file)
# Create temporary directory and extract archive file.
if os.path.isdir(resolved_archive_file):
    serialization_dir = resolved_archive_file
else:
    tempdir = tempfile.mkdtemp()
    with tarfile.open(resolved_archive_file, "r:gz") as archive:
        archive.extractall(tempdir)
    atexit.register(_cleanup_archive_dir, tempdir)
    serialization_dir = tempdir

config = Params.from_file(os.path.join(serialization_dir, "config.json"), "")
model = SemanticRoleLabeler.from_archive(args.archive_file)
archive = Archive(model=model, config=config)

prepare_environment(config)
model.eval()
validation_dataset_reader_params = config.pop("validation_dataset_reader",
                                              None)
if validation_dataset_reader_params is not None:
    dataset_reader = DatasetReader.from_params(
        validation_dataset_reader_params)
else:
    dataset_reader = DatasetReader.from_params(config.pop("dataset_reader"))
instances = dataset_reader.read(args.evaluation_data_path)
instances.index_with(model.vocab)
data_loader_params = config.pop("validation_data_loader", None)
if data_loader_params is None:
    data_loader_params = config.pop("data_loader")