def _embed(self, embed_input: gobbli.io.EmbedInput, context: ContainerTaskContext) -> gobbli.io.EmbedOutput: if embed_input.pooling == gobbli.io.EmbedPooling.NONE: raise ValueError( "Universal Sentence Encoder does sentence encoding, so pooling is required." ) (context.host_input_dir / USE._INPUT_FILE).write_text( escape_line_delimited_texts(embed_input.X)) cmd = ( "python use.py" f" --input-file={context.container_input_dir / USE._INPUT_FILE}" f" --output-file={context.container_output_dir / USE._OUTPUT_FILE}" f" --module-dir={BaseModel._CONTAINER_WEIGHTS_PATH}" f" --batch-size={embed_input.embed_batch_size}") container_logs = run_container( self.docker_client, self.image_tag, cmd, self.logger, **self._base_docker_run_kwargs(context), ) return gobbli.io.EmbedOutput( X_embedded=_read_embeddings(context.host_output_dir / USE._OUTPUT_FILE), _console_output=container_logs, )
def _write_embed_input(self, embed_input: gobbli.io.EmbedInput, input_dir: Path): """ Write the given gobbli embedding input into the format expected by BERT. Make sure the given directory exists first. """ input_dir.mkdir(exist_ok=True, parents=True) input_path = input_dir / BERT._EMBEDDING_INPUT_FILE input_path.write_text(escape_line_delimited_texts(embed_input.X))
def _write_labels(self, labels: List[str], labels_path: Path): """ Write the given labels to the file pointed at by ``labels_path``. """ labels_path.write_text(escape_line_delimited_texts(labels))
def _write_labels(labels: List[Any], output_file: Path): """ Write the given set of labels to the given file. """ output_file.write_text(escape_line_delimited_texts(labels))
def _write_input(self, X: List[str], context: ContainerTaskContext): """ Write the user input to a file for the container to read. """ input_path = context.host_input_dir / BERTMaskedLM._INPUT_FILE input_path.write_text(escape_line_delimited_texts(X))